summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arch/arm64/sme.rst4
-rw-r--r--arch/arm64/kernel/process.c92
2 files changed, 64 insertions, 32 deletions
diff --git a/Documentation/arch/arm64/sme.rst b/Documentation/arch/arm64/sme.rst
index 3a98aed92732..1c1e48d8bd1a 100644
--- a/Documentation/arch/arm64/sme.rst
+++ b/Documentation/arch/arm64/sme.rst
@@ -69,8 +69,8 @@ model features for SME is included in Appendix A.
vectors from 0 to VL/8-1 stored in the same endianness invariant format as is
used for SVE vectors.
-* On thread creation TPIDR2_EL0 is preserved unless CLONE_SETTLS is specified,
- in which case it is set to 0.
+* On thread creation PSTATE.ZA and TPIDR2_EL0 are preserved unless CLONE_VM
+ is specified, in which case PSTATE.ZA is set to 0 and TPIDR2_EL0 is set to 0.
2. Vector lengths
------------------
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 27a5b0c7ec60..74bee78cc3fa 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -364,31 +364,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
task_smstop_sm(dst);
/*
- * In the unlikely event that we create a new thread with ZA
- * enabled we should retain the ZA and ZT state so duplicate
- * it here. This may be shortly freed if we exec() or if
- * CLONE_SETTLS but it's simpler to do it here. To avoid
- * confusing the rest of the code ensure that we have a
- * sve_state allocated whenever sme_state is allocated.
+ * Drop stale reference to src's sme_state and ensure dst has ZA
+ * disabled.
+ *
+ * When necessary, ZA will be inherited later in copy_thread_za().
*/
- if (thread_za_enabled(&src->thread)) {
- dst->thread.sve_state = kzalloc(sve_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sve_state)
- return -ENOMEM;
-
- dst->thread.sme_state = kmemdup(src->thread.sme_state,
- sme_state_size(src),
- GFP_KERNEL);
- if (!dst->thread.sme_state) {
- kfree(dst->thread.sve_state);
- dst->thread.sve_state = NULL;
- return -ENOMEM;
- }
- } else {
- dst->thread.sme_state = NULL;
- clear_tsk_thread_flag(dst, TIF_SME);
- }
+ dst->thread.sme_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr &= ~SVCR_ZA_MASK;
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
@@ -396,6 +379,31 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+static int copy_thread_za(struct task_struct *dst, struct task_struct *src)
+{
+ if (!thread_za_enabled(&src->thread))
+ return 0;
+
+ dst->thread.sve_state = kzalloc(sve_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sve_state)
+ return -ENOMEM;
+
+ dst->thread.sme_state = kmemdup(src->thread.sme_state,
+ sme_state_size(src),
+ GFP_KERNEL);
+ if (!dst->thread.sme_state) {
+ kfree(dst->thread.sve_state);
+ dst->thread.sve_state = NULL;
+ return -ENOMEM;
+ }
+
+ set_tsk_thread_flag(dst, TIF_SME);
+ dst->thread.svcr |= SVCR_ZA_MASK;
+
+ return 0;
+}
+
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -428,8 +436,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
- if (system_supports_tpidr2())
- p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
if (system_supports_poe())
p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
@@ -442,13 +448,39 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
}
/*
+ * Due to the AAPCS64 "ZA lazy saving scheme", PSTATE.ZA and
+ * TPIDR2 need to be manipulated as a pair, and either both
+ * need to be inherited or both need to be reset.
+ *
+ * Within a process, child threads must not inherit their
+ * parent's TPIDR2 value or they may clobber their parent's
+ * stack at some later point.
+ *
+ * When a process is fork()'d, the child must inherit ZA and
+ * TPIDR2 from its parent in case there was dormant ZA state.
+ *
+ * Use CLONE_VM to determine when the child will share the
+ * address space with the parent, and cannot safely inherit the
+ * state.
+ */
+ if (system_supports_sme()) {
+ if (!(clone_flags & CLONE_VM)) {
+ p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ ret = copy_thread_za(p, current);
+ if (ret)
+ return ret;
+ } else {
+ p->thread.tpidr2_el0 = 0;
+ WARN_ON_ONCE(p->thread.svcr & SVCR_ZA_MASK);
+ }
+ }
+
+ /*
* If a TLS pointer was passed to clone, use it for the new
- * thread. We also reset TPIDR2 if it's in use.
+ * thread.
*/
- if (clone_flags & CLONE_SETTLS) {
+ if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
- p->thread.tpidr2_el0 = 0;
- }
ret = copy_thread_gcs(p, args);
if (ret != 0)