summaryrefslogtreecommitdiff
path: root/kernel/rseq.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2025-10-27 09:44:57 +0100
committerIngo Molnar <mingo@kernel.org>2025-11-04 08:32:57 +0100
commitabc850e7616c91ebaa3f5ba3617ab0a104d45039 (patch)
tree9b303aaff8cd13af49ecdc0f834b2f91ed876ef9 /kernel/rseq.c
parent9c37cb6e80b8fcdddc1236ba42ffd438f511192b (diff)
rseq: Provide and use rseq_update_user_cs()
Provide a straight forward implementation to check for and eventually clear/fixup critical sections in user space. The non-debug version does only the minimal sanity checks and aims for efficiency. There are two attack vectors, which are checked for: 1) An abort IP which is in the kernel address space. That would cause at least x86 to return to kernel space via IRET. 2) A rogue critical section descriptor with an abort IP pointing to some arbitrary address, which is not preceded by the RSEQ signature. If the section descriptors are invalid then the resulting misbehaviour of the user space application is not the kernels problem. The kernel provides a run-time switchable debug slow path, which implements the full zoo of checks including termination of the task when one of the gazillion conditions is not met. Replace the zoo in rseq.c with it and invoke it from the TIF_NOTIFY_RESUME handler. Move the remainders into the CONFIG_DEBUG_RSEQ section, which will be replaced and removed in a subsequent step. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Link: https://patch.msgid.link/20251027084307.151465632@linutronix.de
Diffstat (limited to 'kernel/rseq.c')
-rw-r--r--kernel/rseq.c246
1 files changed, 75 insertions, 171 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 679ab8ebdfd3..12a9b6ab2cef 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -382,175 +382,18 @@ efault:
return -EFAULT;
}
-/*
- * Get the user-space pointer value stored in the 'rseq_cs' field.
- */
-static int rseq_get_rseq_cs_ptr_val(struct rseq __user *rseq, u64 *rseq_cs)
-{
- if (!rseq_cs)
- return -EFAULT;
-
-#ifdef CONFIG_64BIT
- if (get_user(*rseq_cs, &rseq->rseq_cs))
- return -EFAULT;
-#else
- if (copy_from_user(rseq_cs, &rseq->rseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-#endif
-
- return 0;
-}
-
-/*
- * If the rseq_cs field of 'struct rseq' contains a valid pointer to
- * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
- */
-static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
-{
- struct rseq_cs __user *urseq_cs;
- u64 ptr;
- u32 __user *usig;
- u32 sig;
- int ret;
-
- ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr);
- if (ret)
- return ret;
-
- /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
- if (!ptr) {
- memset(rseq_cs, 0, sizeof(*rseq_cs));
- return 0;
- }
- /* Check that the pointer value fits in the user-space process space. */
- if (ptr >= TASK_SIZE)
- return -EINVAL;
- urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
- if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-
- if (rseq_cs->start_ip >= TASK_SIZE ||
- rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
- rseq_cs->abort_ip >= TASK_SIZE ||
- rseq_cs->version > 0)
- return -EINVAL;
- /* Check for overflow. */
- if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
- return -EINVAL;
- /* Ensure that abort_ip is not in the critical section. */
- if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
- return -EINVAL;
-
- usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
- ret = get_user(sig, usig);
- if (ret)
- return ret;
-
- if (current->rseq.sig != sig) {
- printk_ratelimited(KERN_WARNING
- "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
- sig, current->rseq.sig, current->pid, usig);
- return -EINVAL;
- }
- return 0;
-}
-
-static bool rseq_warn_flags(const char *str, u32 flags)
-{
- u32 test_flags;
-
- if (!flags)
- return false;
- test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
- test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
- return true;
-}
-
-static int rseq_check_flags(struct task_struct *t, u32 cs_flags)
-{
- u32 flags;
- int ret;
-
- if (rseq_warn_flags("rseq_cs", cs_flags))
- return -EINVAL;
-
- /* Get thread flags. */
- ret = get_user(flags, &t->rseq.usrptr->flags);
- if (ret)
- return ret;
-
- if (rseq_warn_flags("rseq", flags))
- return -EINVAL;
- return 0;
-}
-
-static int clear_rseq_cs(struct rseq __user *rseq)
-{
- /*
- * The rseq_cs field is set to NULL on preemption or signal
- * delivery on top of rseq assembly block, as well as on top
- * of code outside of the rseq assembly block. This performs
- * a lazy clear of the rseq_cs field.
- *
- * Set rseq_cs to NULL.
- */
-#ifdef CONFIG_64BIT
- return put_user(0UL, &rseq->rseq_cs);
-#else
- if (clear_user(&rseq->rseq_cs, sizeof(rseq->rseq_cs)))
- return -EFAULT;
- return 0;
-#endif
-}
-
-/*
- * Unsigned comparison will be true when ip >= start_ip, and when
- * ip < start_ip + post_commit_offset.
- */
-static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
-{
- return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
-}
-
-static int rseq_ip_fixup(struct pt_regs *regs, bool abort)
+static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
- struct task_struct *t = current;
- struct rseq_cs rseq_cs;
- int ret;
-
- rseq_stat_inc(rseq_stats.cs);
-
- ret = rseq_get_rseq_cs(t, &rseq_cs);
- if (ret)
- return ret;
-
- /*
- * Handle potentially not being within a critical section.
- * If not nested over a rseq critical section, restart is useless.
- * Clear the rseq_cs pointer and return.
- */
- if (!in_rseq_cs(ip, &rseq_cs)) {
- rseq_stat_inc(rseq_stats.clear);
- return clear_rseq_cs(t->rseq.usrptr);
- }
- ret = rseq_check_flags(t, rseq_cs.flags);
- if (ret < 0)
- return ret;
- if (!abort)
- return 0;
- ret = clear_rseq_cs(t->rseq.usrptr);
- if (ret)
- return ret;
- rseq_stat_inc(rseq_stats.fixup);
- trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
- rseq_cs.abort_ip);
- instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
- return 0;
+ struct rseq __user *urseq = t->rseq.usrptr;
+ u64 csaddr;
+
+ scoped_user_read_access(urseq, efault)
+ unsafe_get_user(csaddr, &urseq->rseq_cs, efault);
+ if (likely(!csaddr))
+ return true;
+ return rseq_update_user_cs(t, regs, csaddr);
+efault:
+ return false;
}
/*
@@ -567,8 +410,8 @@ static int rseq_ip_fixup(struct pt_regs *regs, bool abort)
void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
{
struct task_struct *t = current;
- int ret, sig;
bool event;
+ int sig;
/*
* If invoked from hypervisors before entering the guest via
@@ -618,8 +461,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
return;
- ret = rseq_ip_fixup(regs, event);
- if (unlikely(ret < 0))
+ if (!rseq_handle_cs(t, regs))
goto error;
if (unlikely(rseq_update_cpu_node_id(t)))
@@ -632,6 +474,68 @@ error:
}
#ifdef CONFIG_DEBUG_RSEQ
+/*
+ * Unsigned comparison will be true when ip >= start_ip, and when
+ * ip < start_ip + post_commit_offset.
+ */
+static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
+{
+ return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
+}
+
+/*
+ * If the rseq_cs field of 'struct rseq' contains a valid pointer to
+ * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
+ */
+static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
+{
+ struct rseq __user *urseq = t->rseq.usrptr;
+ struct rseq_cs __user *urseq_cs;
+ u32 __user *usig;
+ u64 ptr;
+ u32 sig;
+ int ret;
+
+ if (get_user(ptr, &rseq->rseq_cs))
+ return -EFAULT;
+
+ /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
+ if (!ptr) {
+ memset(rseq_cs, 0, sizeof(*rseq_cs));
+ return 0;
+ }
+ /* Check that the pointer value fits in the user-space process space. */
+ if (ptr >= TASK_SIZE)
+ return -EINVAL;
+ urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
+ if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
+ return -EFAULT;
+
+ if (rseq_cs->start_ip >= TASK_SIZE ||
+ rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
+ rseq_cs->abort_ip >= TASK_SIZE ||
+ rseq_cs->version > 0)
+ return -EINVAL;
+ /* Check for overflow. */
+ if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
+ return -EINVAL;
+ /* Ensure that abort_ip is not in the critical section. */
+ if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
+ return -EINVAL;
+
+ usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
+ ret = get_user(sig, usig);
+ if (ret)
+ return ret;
+
+ if (current->rseq.sig != sig) {
+ printk_ratelimited(KERN_WARNING
+ "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
+ sig, current->rseq.sig, current->pid, usig);
+ return -EINVAL;
+ }
+ return 0;
+}
/*
* Terminate the process if a syscall is issued within a restartable