diff options
Diffstat (limited to 'tools/testing/selftests/rseq/rseq.c')
| -rw-r--r-- | tools/testing/selftests/rseq/rseq.c | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c new file mode 100644 index 000000000000..a736727b83c1 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * rseq.c + * + * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; only + * version 2.1 of the License. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <syscall.h> +#include <assert.h> +#include <signal.h> +#include <limits.h> +#include <dlfcn.h> +#include <stddef.h> +#include <sys/auxv.h> +#include <linux/auxvec.h> + +#include <linux/compiler.h> + +#include "kselftest.h" +#include "rseq.h" + +/* + * Define weak versions to play nice with binaries that are statically linked + * against a libc that doesn't support registering its own rseq. + */ +extern __weak ptrdiff_t __rseq_offset; +extern __weak unsigned int __rseq_size; +extern __weak unsigned int __rseq_flags; + +static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; +static const unsigned int *libc_rseq_size_p = &__rseq_size; +static const unsigned int *libc_rseq_flags_p = &__rseq_flags; + +/* Offset from the thread pointer to the rseq area. */ +ptrdiff_t rseq_offset; + +/* + * Size of the registered rseq area. 0 if the registration was + * unsuccessful. + */ +unsigned int rseq_size = -1U; + +/* Flags used during rseq registration. */ +unsigned int rseq_flags; + +static int rseq_ownership; + +/* Allocate a large area for the TLS. */ +#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 + +/* Original struct rseq feature size is 20 bytes. */ +#define ORIG_RSEQ_FEATURE_SIZE 20 + +/* Original struct rseq allocation size is 32 bytes. */ +#define ORIG_RSEQ_ALLOC_SIZE 32 + +/* + * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an + * rseq registration that is larger than the current rseq ABI. + */ +union rseq_tls { + struct rseq_abi abi; + char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE]; +}; + +static +__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = { + .abi = { + .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, + }, +}; + +static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, + int flags, uint32_t sig) +{ + return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); +} + +static int sys_getcpu(unsigned *cpu, unsigned *node) +{ + return syscall(__NR_getcpu, cpu, node, NULL); +} + +bool rseq_available(void) +{ + int rc; + + rc = sys_rseq(NULL, 0, 0, 0); + if (rc != -1) + abort(); + switch (errno) { + case ENOSYS: + return false; + case EINVAL: + return true; + default: + abort(); + } +} + +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + +int rseq_register_current_thread(void) +{ + int rc; + + if (!rseq_ownership) { + /* Treat libc's ownership as a successful registration. */ + return 0; + } + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); + if (rc) { + /* + * After at least one thread has registered successfully + * (rseq_size > 0), the registration of other threads should + * never fail. + */ + if (RSEQ_READ_ONCE(rseq_size) > 0) { + /* Incoherent success/failure within process. */ + abort(); + } + return -1; + } + assert(rseq_current_cpu_raw() >= 0); + + /* + * The first thread to register sets the rseq_size to mimic the libc + * behavior. + */ + if (RSEQ_READ_ONCE(rseq_size) == 0) { + RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); + } + + return 0; +} + +int rseq_unregister_current_thread(void) +{ + int rc; + + if (!rseq_ownership) { + /* Treat libc's ownership as a successful unregistration. */ + return 0; + } + rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + if (rc) + return -1; + return 0; +} + +static __attribute__((constructor)) +void rseq_init(void) +{ + /* + * If the libc's registered rseq size isn't already valid, it may be + * because the binary is dynamically linked and not necessarily due to + * libc not having registered a restartable sequence. Try to find the + * symbols if that's the case. + */ + if (!libc_rseq_size_p || !*libc_rseq_size_p) { + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + } + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && + *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + + /* rseq registration owned by glibc */ + rseq_offset = *libc_rseq_offset_p; + libc_rseq_size = *libc_rseq_size_p; + rseq_flags = *libc_rseq_flags_p; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } + return; + } + rseq_ownership = 1; + + /* Calculate the offset of the rseq area from the thread pointer. */ + rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer(); + + /* rseq flags are deprecated, always set to 0. */ + rseq_flags = 0; + + /* + * Set the size to 0 until at least one thread registers to mimic the + * libc behavior. + */ + rseq_size = 0; +} + +static __attribute__((destructor)) +void rseq_exit(void) +{ + if (!rseq_ownership) + return; + rseq_offset = 0; + rseq_size = -1U; + rseq_ownership = 0; +} + +int32_t rseq_fallback_current_cpu(void) +{ + int32_t cpu; + + cpu = sched_getcpu(); + if (cpu < 0) { + perror("sched_getcpu()"); + abort(); + } + return cpu; +} + +int32_t rseq_fallback_current_node(void) +{ + uint32_t cpu_id, node_id; + int ret; + + ret = sys_getcpu(&cpu_id, &node_id); + if (ret) { + perror("sys_getcpu()"); + return ret; + } + return (int32_t) node_id; +} |
