summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/fpsimdmacros.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm/fpsimdmacros.h')
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h119
1 files changed, 112 insertions, 7 deletions
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 059204477ce6..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -93,7 +93,14 @@
.endif
.endm
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
/* SVE instruction encodings for non-SVE-capable assemblers */
+/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
.macro _sve_str_v nz, nxbase, offset=0
@@ -173,6 +180,68 @@
| (\np)
.endm
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
.macro __for from:req, to:req
.if (\from) == (\to)
_for__body %\from
@@ -207,6 +276,17 @@
921:
.endm
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
/* Preserve the first 128-bits of Znz and zero the rest. */
.macro _sve_flush_z nz
_sve_check_zreg \nz
@@ -216,28 +296,36 @@
.macro sve_flush_z
_for n, 0, 31, _sve_flush_z \n
.endm
-.macro sve_flush_p_ffr
+.macro sve_flush_p
_for n, 0, 15, _sve_pfalse \n
+.endm
+.macro sve_flush_ffr
_sve_wrffr 0
.endm
-.macro sve_save nxbase, xpfpsr, nxtmp
+.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
_for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
_for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
+ cbz \save_ffr, 921f
_sve_rdffr 0
+ b 922f
+921:
+ _sve_pfalse 0 // Zero out FFR
+922:
_sve_str_p 0, \nxbase
_sve_ldr_p 0, \nxbase, -16
-
mrs x\nxtmp, fpsr
str w\nxtmp, [\xpfpsr]
mrs x\nxtmp, fpcr
str w\nxtmp, [\xpfpsr, #4]
.endm
-.macro __sve_load nxbase, xpfpsr, nxtmp
+.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
+ cbz \restore_ffr, 921f
_sve_ldr_p 0, \nxbase
_sve_wrffr 0
+921:
_for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
ldr w\nxtmp, [\xpfpsr]
@@ -246,7 +334,24 @@
msr fpcr, x\nxtmp
.endm
-.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
- sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
- __sve_load \nxbase, \xpfpsr, \nxtmp
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
.endm