// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2025 Igalia S.L. * * Robust list test by André Almeida * * The robust list uAPI allows userspace to create "robust" locks, in the sense * that if the lock holder thread dies, the remaining threads that are waiting * for the lock won't block forever, waiting for a lock that will never be * released. * * This is achieve by userspace setting a list where a thread can enter all the * locks (futexes) that it is holding. The robust list is a linked list, and * userspace register the start of the list with the syscall set_robust_list(). * If such thread eventually dies, the kernel will walk this list, waking up one * thread waiting for each futex and marking the futex word with the flag * FUTEX_OWNER_DIED. * * See also * man set_robust_list * Documententation/locking/robust-futex-ABI.rst * Documententation/locking/robust-futexes.rst */ #define _GNU_SOURCE #include "futextest.h" #include "../../kselftest_harness.h" #include #include #include #include #include #include #include #include #define STACK_SIZE (1024 * 1024) #define FUTEX_TIMEOUT 3 #define SLEEP_US 100 static pthread_barrier_t barrier, barrier2; static int set_robust_list(struct robust_list_head *head, size_t len) { return syscall(SYS_set_robust_list, head, len); } static int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr) { return syscall(SYS_get_robust_list, pid, head, len_ptr); } /* * Basic lock struct, contains just the futex word and the robust list element * Real implementations have also a *prev to easily walk in the list */ struct lock_struct { _Atomic(unsigned int) futex; struct robust_list list; }; /* * Helper function to spawn a child thread. Returns -1 on error, pid on success */ static int create_child(int (*fn)(void *arg), void *arg) { char *stack; pid_t pid; stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (stack == MAP_FAILED) return -1; stack += STACK_SIZE; pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg); if (pid == -1) return -1; return pid; } /* * Helper function to prepare and register a robust list */ static int set_list(struct robust_list_head *head) { int ret; ret = set_robust_list(head, sizeof(*head)); if (ret) return ret; head->futex_offset = (size_t) offsetof(struct lock_struct, futex) - (size_t) offsetof(struct lock_struct, list); head->list.next = &head->list; head->list_op_pending = NULL; return 0; } /* * A basic (and incomplete) mutex lock function with robustness */ static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject) { _Atomic(unsigned int) *futex = &lock->futex; unsigned int zero = 0; pid_t tid = gettid(); int ret = -1; /* * Set list_op_pending before starting the lock, so the kernel can catch * the case where the thread died during the lock operation */ head->list_op_pending = &lock->list; if (atomic_compare_exchange_strong(futex, &zero, tid)) { /* * We took the lock, insert it in the robust list */ struct robust_list *list = &head->list; /* Error injection to test list_op_pending */ if (error_inject) return 0; while (list->next != &head->list) list = list->next; list->next = &lock->list; lock->list.next = &head->list; ret = 0; } else { /* * We didn't take the lock, wait until the owner wakes (or dies) */ struct timespec to; to.tv_sec = FUTEX_TIMEOUT; to.tv_nsec = 0; tid = atomic_load(futex); /* Kernel ignores futexes without the waiters flag */ tid |= FUTEX_WAITERS; atomic_store(futex, tid); ret = futex_wait((futex_t *) futex, tid, &to, 0); /* * A real mutex_lock() implementation would loop here to finally * take the lock. We don't care about that, so we stop here. */ } head->list_op_pending = NULL; return ret; } /* * This child thread will succeed taking the lock, and then will exit holding it */ static int child_fn_lock(void *arg) { struct lock_struct *lock = arg; struct robust_list_head head; int ret; ret = set_list(&head); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return ret; } ret = mutex_lock(lock, &head, false); if (ret) { ksft_test_result_fail("mutex_lock error\n"); return ret; } pthread_barrier_wait(&barrier); /* * There's a race here: the parent thread needs to be inside * futex_wait() before the child thread dies, otherwise it will miss the * wakeup from handle_futex_death() that this child will emit. We wait a * little bit just to make sure that this happens. */ usleep(SLEEP_US); return 0; } /* * Spawns a child thread that will set a robust list, take the lock, register it * in the robust list and die. The parent thread will wait on this futex, and * should be waken up when the child exits. */ TEST(test_robustness) { struct lock_struct lock = { .futex = 0 }; _Atomic(unsigned int) *futex = &lock.futex; struct robust_list_head head; int ret, pid, wstatus; ret = set_list(&head); ASSERT_EQ(ret, 0); /* * Lets use a barrier to ensure that the child thread takes the lock * before the parent */ ret = pthread_barrier_init(&barrier, NULL, 2); ASSERT_EQ(ret, 0); pid = create_child(&child_fn_lock, &lock); ASSERT_NE(pid, -1); pthread_barrier_wait(&barrier); ret = mutex_lock(&lock, &head, false); /* * futex_wait() should return 0 and the futex word should be marked with * FUTEX_OWNER_DIED */ ASSERT_EQ(ret, 0); ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); wait(&wstatus); pthread_barrier_destroy(&barrier); /* Pass only if the child hasn't return error */ if (!WEXITSTATUS(wstatus)) ksft_test_result_pass("%s\n", __func__); } /* * The only valid value for len is sizeof(*head) */ TEST(test_set_robust_list_invalid_size) { struct robust_list_head head; size_t head_size = sizeof(head); int ret; ret = set_robust_list(&head, head_size); ASSERT_EQ(ret, 0); ret = set_robust_list(&head, head_size * 2); ASSERT_EQ(ret, -1); ASSERT_EQ(errno, EINVAL); ret = set_robust_list(&head, head_size - 1); ASSERT_EQ(ret, -1); ASSERT_EQ(errno, EINVAL); ret = set_robust_list(&head, 0); ASSERT_EQ(ret, -1); ASSERT_EQ(errno, EINVAL); ksft_test_result_pass("%s\n", __func__); } /* * Test get_robust_list with pid = 0, getting the list of the running thread */ TEST(test_get_robust_list_self) { struct robust_list_head head, head2, *get_head; size_t head_size = sizeof(head), len_ptr; int ret; ret = set_robust_list(&head, head_size); ASSERT_EQ(ret, 0); ret = get_robust_list(0, &get_head, &len_ptr); ASSERT_EQ(ret, 0); ASSERT_EQ(get_head, &head); ASSERT_EQ(head_size, len_ptr); ret = set_robust_list(&head2, head_size); ASSERT_EQ(ret, 0); ret = get_robust_list(0, &get_head, &len_ptr); ASSERT_EQ(ret, 0); ASSERT_EQ(get_head, &head2); ASSERT_EQ(head_size, len_ptr); ksft_test_result_pass("%s\n", __func__); } static int child_list(void *arg) { struct robust_list_head *head = arg; int ret; ret = set_robust_list(head, sizeof(*head)); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return -1; } /* * After setting the list head, wait until the main thread can call * get_robust_list() for this thread before exiting. */ pthread_barrier_wait(&barrier); pthread_barrier_wait(&barrier2); return 0; } /* * Test get_robust_list from another thread. We use two barriers here to ensure * that: * 1) the child thread set the list before we try to get it from the * parent * 2) the child thread still alive when we try to get the list from it */ TEST(test_get_robust_list_child) { struct robust_list_head head, *get_head; int ret, wstatus; size_t len_ptr; pid_t tid; ret = pthread_barrier_init(&barrier, NULL, 2); ret = pthread_barrier_init(&barrier2, NULL, 2); ASSERT_EQ(ret, 0); tid = create_child(&child_list, &head); ASSERT_NE(tid, -1); pthread_barrier_wait(&barrier); ret = get_robust_list(tid, &get_head, &len_ptr); ASSERT_EQ(ret, 0); ASSERT_EQ(&head, get_head); pthread_barrier_wait(&barrier2); wait(&wstatus); pthread_barrier_destroy(&barrier); pthread_barrier_destroy(&barrier2); /* Pass only if the child hasn't return error */ if (!WEXITSTATUS(wstatus)) ksft_test_result_pass("%s\n", __func__); } static int child_fn_lock_with_error(void *arg) { struct lock_struct *lock = arg; struct robust_list_head head; int ret; ret = set_list(&head); if (ret) { ksft_test_result_fail("set_robust_list error\n"); return -1; } ret = mutex_lock(lock, &head, true); if (ret) { ksft_test_result_fail("mutex_lock error\n"); return -1; } pthread_barrier_wait(&barrier); /* See comment at child_fn_lock() */ usleep(SLEEP_US); return 0; } /* * Same as robustness test, but inject an error where the mutex_lock() exits * earlier, just after setting list_op_pending and taking the lock, to test the * list_op_pending mechanism */ TEST(test_set_list_op_pending) { struct lock_struct lock = { .futex = 0 }; _Atomic(unsigned int) *futex = &lock.futex; struct robust_list_head head; int ret, wstatus; ret = set_list(&head); ASSERT_EQ(ret, 0); ret = pthread_barrier_init(&barrier, NULL, 2); ASSERT_EQ(ret, 0); ret = create_child(&child_fn_lock_with_error, &lock); ASSERT_NE(ret, -1); pthread_barrier_wait(&barrier); ret = mutex_lock(&lock, &head, false); ASSERT_EQ(ret, 0); ASSERT_TRUE(*futex & FUTEX_OWNER_DIED); wait(&wstatus); pthread_barrier_destroy(&barrier); /* Pass only if the child hasn't return error */ if (!WEXITSTATUS(wstatus)) ksft_test_result_pass("%s\n", __func__); else ksft_test_result_fail("%s\n", __func__); } #define CHILD_NR 10 static int child_lock_holder(void *arg) { struct lock_struct *locks = arg; struct robust_list_head head; int i; set_list(&head); for (i = 0; i < CHILD_NR; i++) { locks[i].futex = 0; mutex_lock(&locks[i], &head, false); } pthread_barrier_wait(&barrier); pthread_barrier_wait(&barrier2); /* See comment at child_fn_lock() */ usleep(SLEEP_US); return 0; } static int child_wait_lock(void *arg) { struct lock_struct *lock = arg; struct robust_list_head head; int ret; pthread_barrier_wait(&barrier2); ret = mutex_lock(lock, &head, false); if (ret) { ksft_test_result_fail("mutex_lock error\n"); return -1; } if (!(lock->futex & FUTEX_OWNER_DIED)) { ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n"); return -1; } return 0; } /* * Test a robust list of more than one element. All the waiters should wake when * the holder dies */ TEST(test_robust_list_multiple_elements) { struct lock_struct locks[CHILD_NR]; pid_t pids[CHILD_NR + 1]; int i, ret, wstatus; ret = pthread_barrier_init(&barrier, NULL, 2); ASSERT_EQ(ret, 0); ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1); ASSERT_EQ(ret, 0); pids[0] = create_child(&child_lock_holder, &locks); /* Wait until the locker thread takes the look */ pthread_barrier_wait(&barrier); for (i = 0; i < CHILD_NR; i++) pids[i+1] = create_child(&child_wait_lock, &locks[i]); /* Wait for all children to return */ ret = 0; for (i = 0; i < CHILD_NR; i++) { waitpid(pids[i], &wstatus, 0); if (WEXITSTATUS(wstatus)) ret = -1; } pthread_barrier_destroy(&barrier); pthread_barrier_destroy(&barrier2); /* Pass only if the child hasn't return error */ if (!ret) ksft_test_result_pass("%s\n", __func__); } static int child_circular_list(void *arg) { static struct robust_list_head head; struct lock_struct a, b, c; int ret; ret = set_list(&head); if (ret) { ksft_test_result_fail("set_list error\n"); return -1; } head.list.next = &a.list; /* * The last element should point to head list, but we short circuit it */ a.list.next = &b.list; b.list.next = &c.list; c.list.next = &a.list; return 0; } /* * Create a circular robust list. The kernel should be able to destroy the list * while processing it so it won't be trapped in an infinite loop while handling * a process exit */ TEST(test_circular_list) { int wstatus; create_child(child_circular_list, NULL); wait(&wstatus); /* Pass only if the child hasn't return error */ if (!WEXITSTATUS(wstatus)) ksft_test_result_pass("%s\n", __func__); } TEST_HARNESS_MAIN