summaryrefslogtreecommitdiff
path: root/kernel/rseq.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rseq.c')
-rw-r--r--kernel/rseq.c249
1 files changed, 210 insertions, 39 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c
index a4f86a9d6937..2cb16091ec0a 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -13,13 +13,90 @@
#include <linux/syscalls.h>
#include <linux/rseq.h>
#include <linux/types.h>
+#include <linux/ratelimit.h>
#include <asm/ptrace.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
-#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \
- RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT)
+/* The original rseq structure size (including padding) is 32 bytes. */
+#define ORIG_RSEQ_SIZE 32
+
+#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
+
+#ifdef CONFIG_DEBUG_RSEQ
+static struct rseq *rseq_kernel_fields(struct task_struct *t)
+{
+ return (struct rseq *) t->rseq_fields;
+}
+
+static int rseq_validate_ro_fields(struct task_struct *t)
+{
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ u32 cpu_id_start, cpu_id, node_id, mm_cid;
+ struct rseq __user *rseq = t->rseq;
+
+ /*
+ * Validate fields which are required to be read-only by
+ * user-space.
+ */
+ if (!user_read_access_begin(rseq, t->rseq_len))
+ goto efault;
+ unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
+ unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
+ unsafe_get_user(node_id, &rseq->node_id, efault_end);
+ unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
+ user_read_access_end();
+
+ if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
+ cpu_id != rseq_kernel_fields(t)->cpu_id ||
+ node_id != rseq_kernel_fields(t)->node_id ||
+ mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
+
+ pr_warn("Detected rseq corruption for pid: %d, name: %s\n"
+ "\tcpu_id_start: %u ?= %u\n"
+ "\tcpu_id: %u ?= %u\n"
+ "\tnode_id: %u ?= %u\n"
+ "\tmm_cid: %u ?= %u\n",
+ t->pid, t->comm,
+ cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
+ cpu_id, rseq_kernel_fields(t)->cpu_id,
+ node_id, rseq_kernel_fields(t)->node_id,
+ mm_cid, rseq_kernel_fields(t)->mm_cid);
+ }
+
+ /* For now, only print a console warning on mismatch. */
+ return 0;
+
+efault_end:
+ user_read_access_end();
+efault:
+ return -EFAULT;
+}
+
+static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
+ u32 node_id, u32 mm_cid)
+{
+ rseq_kernel_fields(t)->cpu_id_start = cpu_id;
+ rseq_kernel_fields(t)->cpu_id = cpu_id;
+ rseq_kernel_fields(t)->node_id = node_id;
+ rseq_kernel_fields(t)->mm_cid = mm_cid;
+}
+#else
+static int rseq_validate_ro_fields(struct task_struct *t)
+{
+ return 0;
+}
+
+static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
+ u32 node_id, u32 mm_cid)
+{
+}
+#endif
/*
*
@@ -81,23 +158,52 @@
* F1. <failure>
*/
-static int rseq_update_cpu_id(struct task_struct *t)
+static int rseq_update_cpu_node_id(struct task_struct *t)
{
+ struct rseq __user *rseq = t->rseq;
u32 cpu_id = raw_smp_processor_id();
+ u32 node_id = cpu_to_node(cpu_id);
+ u32 mm_cid = task_mm_cid(t);
- if (put_user(cpu_id, &t->rseq->cpu_id_start))
- return -EFAULT;
- if (put_user(cpu_id, &t->rseq->cpu_id))
- return -EFAULT;
+ /*
+ * Validate read-only rseq fields.
+ */
+ if (rseq_validate_ro_fields(t))
+ goto efault;
+ WARN_ON_ONCE((int) mm_cid < 0);
+ if (!user_write_access_begin(rseq, t->rseq_len))
+ goto efault;
+ unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
+ unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
+ unsafe_put_user(node_id, &rseq->node_id, efault_end);
+ unsafe_put_user(mm_cid, &rseq->mm_cid, efault_end);
+ /*
+ * Additional feature fields added after ORIG_RSEQ_SIZE
+ * need to be conditionally updated only if
+ * t->rseq_len != ORIG_RSEQ_SIZE.
+ */
+ user_write_access_end();
+ rseq_set_ro_fields(t, cpu_id, cpu_id, node_id, mm_cid);
trace_rseq_update(t);
return 0;
+
+efault_end:
+ user_write_access_end();
+efault:
+ return -EFAULT;
}
-static int rseq_reset_rseq_cpu_id(struct task_struct *t)
+static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
{
- u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
+ u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
+ mm_cid = 0;
/*
+ * Validate read-only rseq fields.
+ */
+ if (rseq_validate_ro_fields(t))
+ return -EFAULT;
+ /*
* Reset cpu_id_start to its initial state (0).
*/
if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
@@ -109,6 +215,24 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
*/
if (put_user(cpu_id, &t->rseq->cpu_id))
return -EFAULT;
+ /*
+ * Reset node_id to its initial state (0).
+ */
+ if (put_user(node_id, &t->rseq->node_id))
+ return -EFAULT;
+ /*
+ * Reset mm_cid to its initial state (0).
+ */
+ if (put_user(mm_cid, &t->rseq->mm_cid))
+ return -EFAULT;
+
+ rseq_set_ro_fields(t, cpu_id_start, cpu_id, node_id, mm_cid);
+
+ /*
+ * Additional feature fields added after ORIG_RSEQ_SIZE
+ * need to be conditionally reset only if
+ * t->rseq_len != ORIG_RSEQ_SIZE.
+ */
return 0;
}
@@ -120,8 +244,13 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
u32 sig;
int ret;
- if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
+#ifdef CONFIG_64BIT
+ if (get_user(ptr, &t->rseq->rseq_cs))
+ return -EFAULT;
+#else
+ if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr)))
return -EFAULT;
+#endif
if (!ptr) {
memset(rseq_cs, 0, sizeof(*rseq_cs));
return 0;
@@ -158,28 +287,35 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
return 0;
}
+static bool rseq_warn_flags(const char *str, u32 flags)
+{
+ u32 test_flags;
+
+ if (!flags)
+ return false;
+ test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
+ test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
+ return true;
+}
+
static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
{
u32 flags, event_mask;
int ret;
+ if (rseq_warn_flags("rseq_cs", cs_flags))
+ return -EINVAL;
+
/* Get thread flags. */
ret = get_user(flags, &t->rseq->flags);
if (ret)
return ret;
- /* Take critical section flags into account. */
- flags |= cs_flags;
-
- /*
- * Restart on signal can only be inhibited when restart on
- * preempt and restart on migrate are inhibited too. Otherwise,
- * a preempted signal handler could fail to restart the prior
- * execution context on sigreturn.
- */
- if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) &&
- (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) !=
- RSEQ_CS_PREEMPT_MIGRATE_FLAGS))
+ if (rseq_warn_flags("rseq", flags))
return -EINVAL;
/*
@@ -191,7 +327,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
t->rseq_event_mask = 0;
preempt_enable();
- return !!(event_mask & ~flags);
+ return !!event_mask;
}
static int clear_rseq_cs(struct task_struct *t)
@@ -204,9 +340,13 @@ static int clear_rseq_cs(struct task_struct *t)
*
* Set rseq_cs to NULL.
*/
- if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
+#ifdef CONFIG_64BIT
+ return put_user(0UL, &t->rseq->rseq_cs);
+#else
+ if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs)))
return -EFAULT;
return 0;
+#endif
}
/*
@@ -266,12 +406,18 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(t->flags & PF_EXITING))
return;
- if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
- goto error;
- ret = rseq_ip_fixup(regs);
- if (unlikely(ret < 0))
- goto error;
- if (unlikely(rseq_update_cpu_id(t)))
+
+ /*
+ * regs is NULL if and only if the caller is in a syscall path. Skip
+ * fixup and leave rseq_cs as is so that rseq_sycall() will detect and
+ * kill a misbehaving userspace on debug kernels.
+ */
+ if (regs) {
+ ret = rseq_ip_fixup(regs);
+ if (unlikely(ret < 0))
+ goto error;
+ }
+ if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
return;
@@ -294,8 +440,7 @@ void rseq_syscall(struct pt_regs *regs)
if (!t->rseq)
return;
- if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
- rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
+ if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV);
}
@@ -315,15 +460,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
/* Unregister rseq for current thread. */
if (current->rseq != rseq || !current->rseq)
return -EINVAL;
- if (rseq_len != sizeof(*rseq))
+ if (rseq_len != current->rseq_len)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
- ret = rseq_reset_rseq_cpu_id(current);
+ ret = rseq_reset_rseq_cpu_node_id(current);
if (ret)
return ret;
current->rseq = NULL;
current->rseq_sig = 0;
+ current->rseq_len = 0;
return 0;
}
@@ -336,7 +482,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || rseq_len != sizeof(*rseq))
+ if (current->rseq != rseq || rseq_len != current->rseq_len)
return -EINVAL;
if (current->rseq_sig != sig)
return -EPERM;
@@ -345,16 +491,41 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
}
/*
- * If there was no rseq previously registered,
- * ensure the provided rseq is properly aligned and valid.
+ * If there was no rseq previously registered, ensure the provided rseq
+ * is properly aligned, as communcated to user-space through the ELF
+ * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq
+ * size, the required alignment is the original struct rseq alignment.
+ *
+ * In order to be valid, rseq_len is either the original rseq size, or
+ * large enough to contain all supported fields, as communicated to
+ * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE.
*/
- if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
- rseq_len != sizeof(*rseq))
+ if (rseq_len < ORIG_RSEQ_SIZE ||
+ (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
+ (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
+ rseq_len < offsetof(struct rseq, end))))
return -EINVAL;
if (!access_ok(rseq, rseq_len))
return -EFAULT;
+#ifdef CONFIG_DEBUG_RSEQ
+ /*
+ * Initialize the in-kernel rseq fields copy for validation of
+ * read-only fields.
+ */
+ if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
+ get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
+ get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
+ get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
+ return -EFAULT;
+#endif
+ /*
+ * Activate the registration by setting the rseq area address, length
+ * and signature in the task struct.
+ */
current->rseq = rseq;
+ current->rseq_len = rseq_len;
current->rseq_sig = sig;
+
/*
* If rseq was previously inactive, and has just been
* registered, ensure the cpu_id_start and cpu_id fields