diff options
Diffstat (limited to 'kernel/rseq.c')
-rw-r--r-- | kernel/rseq.c | 249 |
1 files changed, 210 insertions, 39 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c index a4f86a9d6937..2cb16091ec0a 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -13,13 +13,90 @@ #include <linux/syscalls.h> #include <linux/rseq.h> #include <linux/types.h> +#include <linux/ratelimit.h> #include <asm/ptrace.h> #define CREATE_TRACE_POINTS #include <trace/events/rseq.h> -#define RSEQ_CS_PREEMPT_MIGRATE_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE | \ - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) +/* The original rseq structure size (including padding) is 32 bytes. */ +#define ORIG_RSEQ_SIZE 32 + +#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \ + RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \ + RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) + +#ifdef CONFIG_DEBUG_RSEQ +static struct rseq *rseq_kernel_fields(struct task_struct *t) +{ + return (struct rseq *) t->rseq_fields; +} + +static int rseq_validate_ro_fields(struct task_struct *t) +{ + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + u32 cpu_id_start, cpu_id, node_id, mm_cid; + struct rseq __user *rseq = t->rseq; + + /* + * Validate fields which are required to be read-only by + * user-space. + */ + if (!user_read_access_begin(rseq, t->rseq_len)) + goto efault; + unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end); + unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end); + unsafe_get_user(node_id, &rseq->node_id, efault_end); + unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end); + user_read_access_end(); + + if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start || + cpu_id != rseq_kernel_fields(t)->cpu_id || + node_id != rseq_kernel_fields(t)->node_id || + mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) { + + pr_warn("Detected rseq corruption for pid: %d, name: %s\n" + "\tcpu_id_start: %u ?= %u\n" + "\tcpu_id: %u ?= %u\n" + "\tnode_id: %u ?= %u\n" + "\tmm_cid: %u ?= %u\n", + t->pid, t->comm, + cpu_id_start, rseq_kernel_fields(t)->cpu_id_start, + cpu_id, rseq_kernel_fields(t)->cpu_id, + node_id, rseq_kernel_fields(t)->node_id, + mm_cid, rseq_kernel_fields(t)->mm_cid); + } + + /* For now, only print a console warning on mismatch. */ + return 0; + +efault_end: + user_read_access_end(); +efault: + return -EFAULT; +} + +static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id, + u32 node_id, u32 mm_cid) +{ + rseq_kernel_fields(t)->cpu_id_start = cpu_id; + rseq_kernel_fields(t)->cpu_id = cpu_id; + rseq_kernel_fields(t)->node_id = node_id; + rseq_kernel_fields(t)->mm_cid = mm_cid; +} +#else +static int rseq_validate_ro_fields(struct task_struct *t) +{ + return 0; +} + +static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id, + u32 node_id, u32 mm_cid) +{ +} +#endif /* * @@ -81,23 +158,52 @@ * F1. <failure> */ -static int rseq_update_cpu_id(struct task_struct *t) +static int rseq_update_cpu_node_id(struct task_struct *t) { + struct rseq __user *rseq = t->rseq; u32 cpu_id = raw_smp_processor_id(); + u32 node_id = cpu_to_node(cpu_id); + u32 mm_cid = task_mm_cid(t); - if (put_user(cpu_id, &t->rseq->cpu_id_start)) - return -EFAULT; - if (put_user(cpu_id, &t->rseq->cpu_id)) - return -EFAULT; + /* + * Validate read-only rseq fields. + */ + if (rseq_validate_ro_fields(t)) + goto efault; + WARN_ON_ONCE((int) mm_cid < 0); + if (!user_write_access_begin(rseq, t->rseq_len)) + goto efault; + unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end); + unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end); + unsafe_put_user(node_id, &rseq->node_id, efault_end); + unsafe_put_user(mm_cid, &rseq->mm_cid, efault_end); + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally updated only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ + user_write_access_end(); + rseq_set_ro_fields(t, cpu_id, cpu_id, node_id, mm_cid); trace_rseq_update(t); return 0; + +efault_end: + user_write_access_end(); +efault: + return -EFAULT; } -static int rseq_reset_rseq_cpu_id(struct task_struct *t) +static int rseq_reset_rseq_cpu_node_id(struct task_struct *t) { - u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED; + u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0, + mm_cid = 0; /* + * Validate read-only rseq fields. + */ + if (rseq_validate_ro_fields(t)) + return -EFAULT; + /* * Reset cpu_id_start to its initial state (0). */ if (put_user(cpu_id_start, &t->rseq->cpu_id_start)) @@ -109,6 +215,24 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) */ if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; + /* + * Reset node_id to its initial state (0). + */ + if (put_user(node_id, &t->rseq->node_id)) + return -EFAULT; + /* + * Reset mm_cid to its initial state (0). + */ + if (put_user(mm_cid, &t->rseq->mm_cid)) + return -EFAULT; + + rseq_set_ro_fields(t, cpu_id_start, cpu_id, node_id, mm_cid); + + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally reset only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ return 0; } @@ -120,8 +244,13 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) u32 sig; int ret; - if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) +#ifdef CONFIG_64BIT + if (get_user(ptr, &t->rseq->rseq_cs)) + return -EFAULT; +#else + if (copy_from_user(&ptr, &t->rseq->rseq_cs, sizeof(ptr))) return -EFAULT; +#endif if (!ptr) { memset(rseq_cs, 0, sizeof(*rseq_cs)); return 0; @@ -158,28 +287,35 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) return 0; } +static bool rseq_warn_flags(const char *str, u32 flags) +{ + u32 test_flags; + + if (!flags) + return false; + test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str); + test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str); + return true; +} + static int rseq_need_restart(struct task_struct *t, u32 cs_flags) { u32 flags, event_mask; int ret; + if (rseq_warn_flags("rseq_cs", cs_flags)) + return -EINVAL; + /* Get thread flags. */ ret = get_user(flags, &t->rseq->flags); if (ret) return ret; - /* Take critical section flags into account. */ - flags |= cs_flags; - - /* - * Restart on signal can only be inhibited when restart on - * preempt and restart on migrate are inhibited too. Otherwise, - * a preempted signal handler could fail to restart the prior - * execution context on sigreturn. - */ - if (unlikely((flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) && - (flags & RSEQ_CS_PREEMPT_MIGRATE_FLAGS) != - RSEQ_CS_PREEMPT_MIGRATE_FLAGS)) + if (rseq_warn_flags("rseq", flags)) return -EINVAL; /* @@ -191,7 +327,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) t->rseq_event_mask = 0; preempt_enable(); - return !!(event_mask & ~flags); + return !!event_mask; } static int clear_rseq_cs(struct task_struct *t) @@ -204,9 +340,13 @@ static int clear_rseq_cs(struct task_struct *t) * * Set rseq_cs to NULL. */ - if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) +#ifdef CONFIG_64BIT + return put_user(0UL, &t->rseq->rseq_cs); +#else + if (clear_user(&t->rseq->rseq_cs, sizeof(t->rseq->rseq_cs))) return -EFAULT; return 0; +#endif } /* @@ -266,12 +406,18 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) if (unlikely(t->flags & PF_EXITING)) return; - if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq)))) - goto error; - ret = rseq_ip_fixup(regs); - if (unlikely(ret < 0)) - goto error; - if (unlikely(rseq_update_cpu_id(t))) + + /* + * regs is NULL if and only if the caller is in a syscall path. Skip + * fixup and leave rseq_cs as is so that rseq_sycall() will detect and + * kill a misbehaving userspace on debug kernels. + */ + if (regs) { + ret = rseq_ip_fixup(regs); + if (unlikely(ret < 0)) + goto error; + } + if (unlikely(rseq_update_cpu_node_id(t))) goto error; return; @@ -294,8 +440,7 @@ void rseq_syscall(struct pt_regs *regs) if (!t->rseq) return; - if (!access_ok(t->rseq, sizeof(*t->rseq)) || - rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) + if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) force_sig(SIGSEGV); } @@ -315,15 +460,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, /* Unregister rseq for current thread. */ if (current->rseq != rseq || !current->rseq) return -EINVAL; - if (rseq_len != sizeof(*rseq)) + if (rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; - ret = rseq_reset_rseq_cpu_id(current); + ret = rseq_reset_rseq_cpu_node_id(current); if (ret) return ret; current->rseq = NULL; current->rseq_sig = 0; + current->rseq_len = 0; return 0; } @@ -336,7 +482,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, * the provided address differs from the prior * one. */ - if (current->rseq != rseq || rseq_len != sizeof(*rseq)) + if (current->rseq != rseq || rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; @@ -345,16 +491,41 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, } /* - * If there was no rseq previously registered, - * ensure the provided rseq is properly aligned and valid. + * If there was no rseq previously registered, ensure the provided rseq + * is properly aligned, as communcated to user-space through the ELF + * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq + * size, the required alignment is the original struct rseq alignment. + * + * In order to be valid, rseq_len is either the original rseq size, or + * large enough to contain all supported fields, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. */ - if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || - rseq_len != sizeof(*rseq)) + if (rseq_len < ORIG_RSEQ_SIZE || + (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; +#ifdef CONFIG_DEBUG_RSEQ + /* + * Initialize the in-kernel rseq fields copy for validation of + * read-only fields. + */ + if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) || + get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) || + get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) || + get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid)) + return -EFAULT; +#endif + /* + * Activate the registration by setting the rseq area address, length + * and signature in the task struct. + */ current->rseq = rseq; + current->rseq_len = rseq_len; current->rseq_sig = sig; + /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields |