Memory copy, reads and writes non-temporal
These instructions copy a requested number of bytes in memory from a source address to a destination address. The prologue, main, and epilogue instructions are expected to be run in succession and to appear consecutively in memory: CPYPN, then CPYMN, and then CPYEN.
CPYPN performs some preconditioning of the arguments suitable for using the CPYMN instruction, and copies an IMPLEMENTATION DEFINED portion of the requested number of bytes. CPYMN copies a further IMPLEMENTATION DEFINED portion of the remaining bytes. CPYEN copies any final remaining bytes.
The ability to copy an IMPLEMENTATION DEFINED number of bytes allows an implementation to optimize how the bytes being copied are divided between the different instructions.
For more information on exceptions specific to memory copy instructions, see Memory Copy and Memory Set exceptions.
The architecture supports two algorithms for the memory copy: option A and option B. Which algorithm is used is IMPLEMENTATION DEFINED.
Portable software should not assume that the choice of algorithm is constant.
For CPYPN:
If Xn[63:55] != '000000000', the copy size Xn is saturated to 0x007FFFFFFFFFFFFF.
After saturation is performed, the direction of the memory copy is based on the following:
If (Xs[55:0] > Xd[55:0]) and (Xd[55:0] + saturated copy size) > Xs[55:0], then the direction is forward.
If (Xs[55:0] < Xd[55:0]) and (Xs[55:0] + saturated copy size) > Xd[55:0], then the direction is backward.
Otherwise, the direction is an IMPLEMENTATION DEFINED choice between forward and backward.
On completion of CPYPN, option A:
On completion of CPYPN, option B:
For CPYMN, option A, when PSTATE.C = 0:
For CPYMN, option B, when PSTATE.C = 1:
For CPYEN, option A, when PSTATE.C = 0:
If the copy is in the backward direction (Xn holds a positive number), then:
On completion of the instruction:
For CPYEN, option B, when PSTATE.C = 1:
If the copy is in the backward direction (PSTATE.N == 1), then:
On completion of the instruction:
| 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 | 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
| sz | 0 | 1 | 1 | 1 | 0 | 1 | op1 | 0 | Rs | 1 | 1 | 0 | 0 | 0 | 1 | Rn | Rd | ||||||||||||||
| o0 | op2 | ||||||||||||||||||||||||||||||
if !IsFeatureImplemented(FEAT_MOPS) || sz != '00' then EndOfDecode(Decode_UNDEF); end; var memcpy : CPYParams; memcpy.d = UInt(Rd); memcpy.s = UInt(Rs); memcpy.n = UInt(Rn); let options : bits(4) = op2; let rnontemporal : boolean = options[3] == '1'; let wnontemporal : boolean = options[2] == '1'; case op1 of when '00' => memcpy.stage = MOPSStage_Prologue; when '01' => memcpy.stage = MOPSStage_Main; when '10' => memcpy.stage = MOPSStage_Epilogue; end;
For information about the CONSTRAINED UNPREDICTABLE behavior of this instruction, see Architectural Constraints on UNPREDICTABLE behaviors, and particularly Memory Copy and Memory Set CPY* and Crossing a page boundary with different memory types or Shareability attributes.
CheckMOPSEnabled(); CheckCPYConstrainedUnpredictable(memcpy.n, memcpy.d, memcpy.s); memcpy.nzcv = PSTATE.[N,Z,C,V]; memcpy.toaddress = X{64}(memcpy.d); memcpy.fromaddress = X{64}(memcpy.s); if memcpy.stage == MOPSStage_Prologue then memcpy.cpysize = UInt(X{64}(memcpy.n)); else memcpy.cpysize = SInt(X{64}(memcpy.n)); end; memcpy.implements_option_a = CPYOptionA(); let rprivileged : boolean = (if options[1] == '1' then AArch64_IsUnprivAccessPriv() else PSTATE.EL != EL0); let wprivileged : boolean = (if options[0] == '1' then AArch64_IsUnprivAccessPriv() else PSTATE.EL != EL0); let raccdesc : AccessDescriptor = CreateAccDescMOPS(MemOp_LOAD, rprivileged, rnontemporal); let waccdesc : AccessDescriptor = CreateAccDescMOPS(MemOp_STORE, wprivileged, wnontemporal); if memcpy.stage == MOPSStage_Prologue then if memcpy.cpysize > ArchMaxMOPSCPYSize then memcpy.cpysize = ArchMaxMOPSCPYSize; end; memcpy.forward = IsMemCpyForward(memcpy); if memcpy.implements_option_a then memcpy.nzcv = '0000'; if memcpy.forward then // Copy in the forward direction offsets the arguments. memcpy.toaddress = memcpy.toaddress + memcpy.cpysize; memcpy.fromaddress = memcpy.fromaddress + memcpy.cpysize; memcpy.cpysize = 0 - memcpy.cpysize; end; else if !memcpy.forward then // Copy in the reverse direction offsets the arguments. memcpy.toaddress = memcpy.toaddress + memcpy.cpysize; memcpy.fromaddress = memcpy.fromaddress + memcpy.cpysize; memcpy.nzcv = '1010'; else memcpy.nzcv = '0010'; end; end; end; memcpy.stagecpysize = MemCpyStageSize(memcpy); if memcpy.stage != MOPSStage_Prologue then memcpy.forward = memcpy.cpysize < 0 || (!memcpy.implements_option_a && memcpy.nzcv[3] == '0'); CheckMemCpyParams(memcpy, options); end; var copied : integer; var iswrite : boolean; var memaddrdesc : AddressDescriptor; var memstatus : PhysMemRetStatus; var fault : boolean = FALSE; var B : MOPSBlockSize = 0; if memcpy.implements_option_a then while memcpy.stagecpysize != 0 && !fault looplimit ArchMaxMOPSCPYSize do // IMP DEF selection of the block size that is worked on. While many // implementations might make this constant, that is not assumed. B = CPYSizeChoice(memcpy); if memcpy.forward then assert B <= -1 * memcpy.stagecpysize; (copied, iswrite, memaddrdesc, memstatus) = MemCpyBytes( memcpy.toaddress + memcpy.cpysize, memcpy.fromaddress + memcpy.cpysize, memcpy.forward, B, raccdesc, waccdesc); if copied != B then fault = TRUE; else memcpy.cpysize = memcpy.cpysize + B; memcpy.stagecpysize = memcpy.stagecpysize + B; end; else assert B <= memcpy.stagecpysize; memcpy.cpysize = memcpy.cpysize - B; memcpy.stagecpysize = memcpy.stagecpysize - B; (copied, iswrite, memaddrdesc, memstatus) = MemCpyBytes( memcpy.toaddress + memcpy.cpysize, memcpy.fromaddress + memcpy.cpysize, memcpy.forward, B, raccdesc, waccdesc); if copied != B then fault = TRUE; memcpy.cpysize = memcpy.cpysize + B; memcpy.stagecpysize = memcpy.stagecpysize + B; end; end; end; else while memcpy.stagecpysize > 0 && !fault looplimit ArchMaxMOPSCPYSize do // IMP DEF selection of the block size that is worked on. While many // implementations might make this constant, that is not assumed. B = CPYSizeChoice(memcpy); assert B <= memcpy.stagecpysize; if memcpy.forward then (copied, iswrite, memaddrdesc, memstatus) = MemCpyBytes(memcpy.toaddress, memcpy.fromaddress, memcpy.forward, B, raccdesc, waccdesc); if copied != B then fault = TRUE; else memcpy.fromaddress = memcpy.fromaddress + B; memcpy.toaddress = memcpy.toaddress + B; end; else (copied, iswrite, memaddrdesc, memstatus) = MemCpyBytes(memcpy.toaddress - B, memcpy.fromaddress - B, memcpy.forward, B, raccdesc, waccdesc); if copied != B then fault = TRUE; else memcpy.fromaddress = memcpy.fromaddress - B; memcpy.toaddress = memcpy.toaddress - B; end; end; if !fault then memcpy.cpysize = memcpy.cpysize - B; memcpy.stagecpysize = memcpy.stagecpysize - B; end; end; end; UpdateCpyRegisters(memcpy, fault, copied); if fault then if IsFault(memaddrdesc) then AArch64_Abort(memaddrdesc.fault); else let accdesc : AccessDescriptor = if iswrite then waccdesc else raccdesc; HandleExternalAbort(memstatus, iswrite, memaddrdesc, B, accdesc); end; end; if memcpy.stage == MOPSStage_Prologue then PSTATE.[N,Z,C,V] = memcpy.nzcv; end;
2026-03_rel 2026-03-26 20:48:11
Copyright © 2010-2026 Arm Limited or its affiliates. All rights reserved. This document is Non-Confidential.