Store a single-element structure from one lane of one register
This instruction stores the specified element of a SIMD&FP register to memory.
Depending on the settings in the CPACR_EL1, CPTR_EL2, and CPTR_EL3 registers, and the current Security state and Exception level, an attempt to execute the instruction might be trapped.
It has encodings from 2 classes: No offset and Post-index
| 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 | 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
| 0 | Q | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | x | x | 0 | S | size | Rn | Rt | |||||||||
| L | R | o2 | opcode | ||||||||||||||||||||||||||||
if !IsFeatureImplemented(FEAT_AdvSIMD) then EndOfDecode(Decode_UNDEF); end; var t : integer = UInt(Rt); let n : integer = UInt(Rn); let m : integer = ARBITRARY : integer; let wback : boolean = FALSE; let nontemporal : boolean = FALSE; let tagchecked : boolean = wback || n != 31;
| 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 | 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
| 0 | Q | 0 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | Rm | x | x | 0 | S | size | Rn | Rt | |||||||||||||
| L | R | opcode | |||||||||||||||||||||||||||||
if !IsFeatureImplemented(FEAT_AdvSIMD) then EndOfDecode(Decode_UNDEF); end; var t : integer = UInt(Rt); let n : integer = UInt(Rn); let m : integer = UInt(Rm); let wback : boolean = TRUE; let nontemporal : boolean = FALSE; let tagchecked : boolean = wback || n != 31;
| <Vt> |
Is the name of the first or only SIMD&FP register to be transferred, encoded in the "Rt" field. |
| <Xn|SP> |
Is the 64-bit name of the general-purpose base register or stack pointer, encoded in the "Rn" field. |
| <Xm> |
Is the 64-bit name of the general-purpose post-index register, excluding XZR, encoded in the "Rm" field. |
var scale : bits(2) = opcode[2:1]; let selem : integer = UInt(opcode[0]::R) + 1; var replicate : boolean = FALSE; var index : integer; case scale of when '11' => // load and replicate if L == '0' || S == '1' then EndOfDecode(Decode_UNDEF); end; scale = size; replicate = TRUE; when '00' => index = UInt(Q::S::size); // B[0-15] when '01' => if size[0] == '1' then EndOfDecode(Decode_UNDEF); end; index = UInt(Q::S::size[1]); // H[0-7] when '10' => if size[1] == '1' then EndOfDecode(Decode_UNDEF); end; if size[0] == '0' then index = UInt(Q::S); // S[0-3] else if S == '1' then EndOfDecode(Decode_UNDEF); end; index = UInt(Q); // D[0-1] scale = '11'; end; end; let datasize : integer{} = 64 << UInt(Q); let esize : integer{} = 8 << UInt(scale);
AArch64_CheckFPAdvSIMDEnabled(); var address : bits(64); var eaddr : bits(64); var rval : bits(128); var element : bits(esize); var offs : bits(64) = Zeros{64}; let ebytes : integer{} = esize DIV 8; let privileged : boolean = PSTATE.EL != EL0; let accdesc : AccessDescriptor = CreateAccDescASIMD(MemOp_STORE, nontemporal, tagchecked, privileged); if n == 31 then CheckSPAlignment(); address = SP{64}(); else address = X{64}(n); end; if replicate then // load and replicate to all elements for s = 0 to selem-1 do eaddr = AddressIncrement(address, offs, accdesc); element = Mem{esize}(eaddr, accdesc); // replicate to fill 128- or 64-bit register V{datasize}(t) = Replicate{datasize}(element); offs = offs + ebytes; t = (t + 1) MOD 32; end; else // load/store one element per register for s = 0 to selem-1 do rval = V{128}(t); eaddr = AddressIncrement(address, offs, accdesc); // extract from one lane of 128-bit register Mem{esize}(eaddr, accdesc) = rval[index*:esize]; offs = offs + ebytes; t = ( t + 1 ) MOD 32; end; end; if wback then if m != 31 then offs = X{64}(m); end; address = AddressAdd(address, offs, accdesc); if n == 31 then SP{64}() = address; else X{64}(n) = address; end; end;
This instruction is a data-independent-time instruction as described in About PSTATE.DIT.
2026-03_rel 2026-03-26 20:48:11
Copyright © 2010-2026 Arm Limited or its affiliates. All rights reserved. This document is Non-Confidential.