From ee3e3ac05c2631ce1f12d88c9cc9a092f8fe947a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 22 Nov 2022 15:39:05 -0500 Subject: rseq: Introduce extensible rseq ABI Introduce the extensible rseq ABI, where the feature size supported by the kernel and the required alignment are communicated to user-space through ELF auxiliary vectors. This allows user-space to call rseq registration with a rseq_len of either 32 bytes for the original struct rseq size (which includes padding), or larger. If rseq_len is larger than 32 bytes, then it must be large enough to contain the feature size communicated to user-space through ELF auxiliary vectors. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20221122203932.231377-4-mathieu.desnoyers@efficios.com --- kernel/rseq.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'kernel/rseq.c') diff --git a/kernel/rseq.c b/kernel/rseq.c index d38ab944105d..7962738455c9 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -18,6 +18,9 @@ #define CREATE_TRACE_POINTS #include +/* The original rseq structure size (including padding) is 32 bytes. */ +#define ORIG_RSEQ_SIZE 32 + #define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) @@ -87,10 +90,15 @@ static int rseq_update_cpu_id(struct task_struct *t) u32 cpu_id = raw_smp_processor_id(); struct rseq __user *rseq = t->rseq; - if (!user_write_access_begin(rseq, sizeof(*rseq))) + if (!user_write_access_begin(rseq, t->rseq_len)) goto efault; unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end); unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end); + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally updated only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ user_write_access_end(); trace_rseq_update(t); return 0; @@ -117,6 +125,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) */ if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; + /* + * Additional feature fields added after ORIG_RSEQ_SIZE + * need to be conditionally reset only if + * t->rseq_len != ORIG_RSEQ_SIZE. + */ return 0; } @@ -344,7 +357,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, /* Unregister rseq for current thread. */ if (current->rseq != rseq || !current->rseq) return -EINVAL; - if (rseq_len != sizeof(*rseq)) + if (rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; @@ -353,6 +366,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return ret; current->rseq = NULL; current->rseq_sig = 0; + current->rseq_len = 0; return 0; } @@ -365,7 +379,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, * the provided address differs from the prior * one. */ - if (current->rseq != rseq || rseq_len != sizeof(*rseq)) + if (current->rseq != rseq || rseq_len != current->rseq_len) return -EINVAL; if (current->rseq_sig != sig) return -EPERM; @@ -374,15 +388,24 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, } /* - * If there was no rseq previously registered, - * ensure the provided rseq is properly aligned and valid. + * If there was no rseq previously registered, ensure the provided rseq + * is properly aligned, as communcated to user-space through the ELF + * auxiliary vector AT_RSEQ_ALIGN. If rseq_len is the original rseq + * size, the required alignment is the original struct rseq alignment. + * + * In order to be valid, rseq_len is either the original rseq size, or + * large enough to contain all supported fields, as communicated to + * user-space through the ELF auxiliary vector AT_RSEQ_FEATURE_SIZE. */ - if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || - rseq_len != sizeof(*rseq)) + if (rseq_len < ORIG_RSEQ_SIZE || + (rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) || + (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) || + rseq_len < offsetof(struct rseq, end)))) return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; current->rseq = rseq; + current->rseq_len = rseq_len; current->rseq_sig = sig; /* * If rseq was previously inactive, and has just been -- cgit