summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/fpsimdmacros.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include/asm/fpsimdmacros.h')
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h319
1 files changed, 306 insertions, 13 deletions
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index bbec599c96bd..cda81d009c9b 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -1,22 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* FP/SIMD state saving and restoring macros
*
* Copyright (C) 2012 ARM Ltd.
* Author: Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <asm/assembler.h>
+
.macro fpsimd_save state, tmpnr
stp q0, q1, [\state, #16 * 0]
stp q2, q3, [\state, #16 * 2]
@@ -40,6 +31,19 @@
str w\tmpnr, [\state, #16 * 2 + 4]
.endm
+.macro fpsimd_restore_fpcr state, tmp
+ /*
+ * Writes to fpcr may be self-synchronising, so avoid restoring
+ * the register if it hasn't changed.
+ */
+ mrs \tmp, fpcr
+ cmp \tmp, \state
+ b.eq 9999f
+ msr fpcr, \state
+9999:
+.endm
+
+/* Clobbers \state */
.macro fpsimd_restore state, tmpnr
ldp q0, q1, [\state, #16 * 0]
ldp q2, q3, [\state, #16 * 2]
@@ -60,5 +64,294 @@
ldr w\tmpnr, [\state, #16 * 2]
msr fpsr, x\tmpnr
ldr w\tmpnr, [\state, #16 * 2 + 4]
- msr fpcr, x\tmpnr
+ fpsimd_restore_fpcr x\tmpnr, \state
+.endm
+
+/* Sanity-check macros to help avoid encoding garbage instructions */
+
+.macro _check_general_reg nr
+ .if (\nr) < 0 || (\nr) > 30
+ .error "Bad register number \nr."
+ .endif
+.endm
+
+.macro _sve_check_zreg znr
+ .if (\znr) < 0 || (\znr) > 31
+ .error "Bad Scalable Vector Extension vector register number \znr."
+ .endif
+.endm
+
+.macro _sve_check_preg pnr
+ .if (\pnr) < 0 || (\pnr) > 15
+ .error "Bad Scalable Vector Extension predicate register number \pnr."
+ .endif
+.endm
+
+.macro _check_num n, min, max
+ .if (\n) < (\min) || (\n) > (\max)
+ .error "Number \n out of range [\min,\max]"
+ .endif
+.endm
+
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
+/* SVE instruction encodings for non-SVE-capable assemblers */
+/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
+
+/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_v nz, nxbase, offset=0
+ _sve_check_zreg \nz
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85804000 \
+ | (\nz) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_str_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe5800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
+.macro _sve_ldr_p np, nxbase, offset=0
+ _sve_check_preg \np
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0x85800000 \
+ | (\np) \
+ | ((\nxbase) << 5) \
+ | (((\offset) & 7) << 10) \
+ | (((\offset) & 0x1f8) << 13)
+.endm
+
+/* RDVL X\nx, #\imm */
+.macro _sve_rdvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5000 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/* RDFFR (unpredicated): RDFFR P\np.B */
+.macro _sve_rdffr np
+ _sve_check_preg \np
+ .inst 0x2519f000 \
+ | (\np)
+.endm
+
+/* WRFFR P\np.B */
+.macro _sve_wrffr np
+ _sve_check_preg \np
+ .inst 0x25289000 \
+ | ((\np) << 5)
+.endm
+
+/* PFALSE P\np.B */
+.macro _sve_pfalse np
+ _sve_check_preg \np
+ .inst 0x2518e400 \
+ | (\np)
+.endm
+
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ _check_general_reg \nx
+ .inst 0xe11f8000 \
+ | (\nx << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ _check_general_reg \nx
+ .inst 0xe13f8000 \
+ | (\nx << 5)
+.endm
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for %\from, %((\from) + ((\to) - (\from)) / 2)
+ __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+/* Update ZCR_EL1.LEN with the new VQ */
+.macro sve_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_ZCR_EL1
+ bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
+/* Preserve the first 128-bits of Znz and zero the rest. */
+.macro _sve_flush_z nz
+ _sve_check_zreg \nz
+ mov v\nz\().16b, v\nz\().16b
+.endm
+
+.macro sve_flush_z
+ _for n, 0, 31, _sve_flush_z \n
+.endm
+.macro sve_flush_p
+ _for n, 0, 15, _sve_pfalse \n
+.endm
+.macro sve_flush_ffr
+ _sve_wrffr 0
+.endm
+
+.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
+ _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
+ _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
+ cbz \save_ffr, 921f
+ _sve_rdffr 0
+ b 922f
+921:
+ _sve_pfalse 0 // Zero out FFR
+922:
+ _sve_str_p 0, \nxbase
+ _sve_ldr_p 0, \nxbase, -16
+ mrs x\nxtmp, fpsr
+ str w\nxtmp, [\xpfpsr]
+ mrs x\nxtmp, fpcr
+ str w\nxtmp, [\xpfpsr, #4]
+.endm
+
+.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
+ _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
+ cbz \restore_ffr, 921f
+ _sve_ldr_p 0, \nxbase
+ _sve_wrffr 0
+921:
+ _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
+
+ ldr w\nxtmp, [\xpfpsr]
+ msr fpsr, x\nxtmp
+ ldr w\nxtmp, [\xpfpsr, #4]
+ msr fpcr, x\nxtmp
+.endm
+
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
.endm