summaryrefslogtreecommitdiff
path: root/kernel/futex/waitwake.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex/waitwake.c')
-rw-r--r--kernel/futex/waitwake.c336
1 files changed, 190 insertions, 146 deletions
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index ba01b9408203..e2bbe5509ec2 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/plist.h>
#include <linux/sched/task.h>
#include <linux/sched/signal.h>
#include <linux/freezer.h>
@@ -106,20 +107,11 @@
* double_lock_hb() and double_unlock_hb(), respectively.
*/
-/*
- * The hash bucket lock must be held when this is called.
- * Afterwards, the futex_q must not be accessed. Callers
- * must ensure to later call wake_up_q() for the actual
- * wakeups to occur.
- */
-void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
+bool __futex_wake_mark(struct futex_q *q)
{
- struct task_struct *p = q->task;
-
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
- return;
+ return false;
- get_task_struct(p);
__futex_unqueue(q);
/*
* The waiting task can free the futex_q as soon as q->lock_ptr = NULL
@@ -130,6 +122,26 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
*/
smp_store_release(&q->lock_ptr, NULL);
+ return true;
+}
+
+/*
+ * The hash bucket lock must be held when this is called.
+ * Afterwards, the futex_q must not be accessed. Callers
+ * must ensure to later call wake_up_q() for the actual
+ * wakeups to occur.
+ */
+void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
+{
+ struct task_struct *p = q->task;
+
+ get_task_struct(p);
+
+ if (!__futex_wake_mark(q)) {
+ put_task_struct(p);
+ return;
+ }
+
/*
* Queue the task for later wakeup for after we've released
* the hb->lock.
@@ -142,20 +154,22 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
*/
int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
{
- struct futex_hash_bucket *hb;
struct futex_q *this, *next;
union futex_key key = FUTEX_KEY_INIT;
- int ret;
DEFINE_WAKE_Q(wake_q);
+ int ret;
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
+ ret = get_futex_key(uaddr, flags, &key, FUTEX_READ);
if (unlikely(ret != 0))
return ret;
- hb = futex_hash(&key);
+ if ((flags & FLAGS_STRICT) && !nr_wake)
+ return 0;
+
+ CLASS(hb, hb)(&key);
/* Make sure we really have tasks to wakeup */
if (!futex_hb_waiters_pending(hb))
@@ -174,7 +188,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!(this->bitset & bitset))
continue;
- futex_wake_mark(&wake_q, this);
+ this->wake(&wake_q, this);
if (++ret >= nr_wake)
break;
}
@@ -195,13 +209,12 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
if (oparg < 0 || oparg > 31) {
- char comm[sizeof(current->comm)];
/*
* kill this print and return -EINVAL when userspace
* is sane again
*/
pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
- get_task_comm(comm, current), oparg);
+ current->comm, oparg);
oparg &= 31;
}
oparg = 1 << oparg;
@@ -239,80 +252,81 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
int nr_wake, int nr_wake2, int op)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
- struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
int ret, op_ret;
DEFINE_WAKE_Q(wake_q);
retry:
- ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
+ ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ);
if (unlikely(ret != 0))
return ret;
- ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
+ ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE);
if (unlikely(ret != 0))
return ret;
- hb1 = futex_hash(&key1);
- hb2 = futex_hash(&key2);
-
retry_private:
- double_lock_hb(hb1, hb2);
- op_ret = futex_atomic_op_inuser(op, uaddr2);
- if (unlikely(op_ret < 0)) {
- double_unlock_hb(hb1, hb2);
-
- if (!IS_ENABLED(CONFIG_MMU) ||
- unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
- /*
- * we don't get EFAULT from MMU faults if we don't have
- * an MMU, but we might get them from range checking
- */
- ret = op_ret;
- return ret;
- }
-
- if (op_ret == -EFAULT) {
- ret = fault_in_user_writeable(uaddr2);
- if (ret)
+ if (1) {
+ CLASS(hb, hb1)(&key1);
+ CLASS(hb, hb2)(&key2);
+
+ double_lock_hb(hb1, hb2);
+ op_ret = futex_atomic_op_inuser(op, uaddr2);
+ if (unlikely(op_ret < 0)) {
+ double_unlock_hb(hb1, hb2);
+
+ if (!IS_ENABLED(CONFIG_MMU) ||
+ unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
+ /*
+ * we don't get EFAULT from MMU faults if we don't have
+ * an MMU, but we might get them from range checking
+ */
+ ret = op_ret;
return ret;
- }
-
- cond_resched();
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
- goto retry;
- }
+ }
- plist_for_each_entry_safe(this, next, &hb1->chain, list) {
- if (futex_match (&this->key, &key1)) {
- if (this->pi_state || this->rt_waiter) {
- ret = -EINVAL;
- goto out_unlock;
+ if (op_ret == -EFAULT) {
+ ret = fault_in_user_writeable(uaddr2);
+ if (ret)
+ return ret;
}
- futex_wake_mark(&wake_q, this);
- if (++ret >= nr_wake)
- break;
+
+ cond_resched();
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
+ goto retry;
}
- }
- if (op_ret > 0) {
- op_ret = 0;
- plist_for_each_entry_safe(this, next, &hb2->chain, list) {
- if (futex_match (&this->key, &key2)) {
+ plist_for_each_entry_safe(this, next, &hb1->chain, list) {
+ if (futex_match(&this->key, &key1)) {
if (this->pi_state || this->rt_waiter) {
ret = -EINVAL;
goto out_unlock;
}
- futex_wake_mark(&wake_q, this);
- if (++op_ret >= nr_wake2)
+ this->wake(&wake_q, this);
+ if (++ret >= nr_wake)
break;
}
}
- ret += op_ret;
- }
+
+ if (op_ret > 0) {
+ op_ret = 0;
+ plist_for_each_entry_safe(this, next, &hb2->chain, list) {
+ if (futex_match(&this->key, &key2)) {
+ if (this->pi_state || this->rt_waiter) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ this->wake(&wake_q, this);
+ if (++op_ret >= nr_wake2)
+ break;
+ }
+ }
+ ret += op_ret;
+ }
out_unlock:
- double_unlock_hb(hb1, hb2);
+ double_unlock_hb(hb1, hb2);
+ }
wake_up_q(&wake_q);
return ret;
}
@@ -320,23 +334,12 @@ out_unlock:
static long futex_wait_restart(struct restart_block *restart);
/**
- * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
- * @hb: the futex hash bucket, must be locked by the caller
+ * futex_do_wait() - wait for wakeup, timeout, or signal
* @q: the futex_q to queue up on
* @timeout: the prepared hrtimer_sleeper, or null for no timeout
*/
-void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
- struct hrtimer_sleeper *timeout)
+void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout)
{
- /*
- * The task state is guaranteed to be set before another task can
- * wake it. set_current_state() is implemented using smp_store_mb() and
- * futex_queue() calls spin_unlock() upon completion, both serializing
- * access to the hash list and forcing another memory barrier.
- */
- set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
- futex_queue(q, hb);
-
/* Arm the timer */
if (timeout)
hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
@@ -358,7 +361,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
}
/**
- * unqueue_multiple - Remove various futexes from their hash bucket
+ * futex_unqueue_multiple - Remove various futexes from their hash bucket
* @v: The list of futexes to unqueue
* @count: Number of futexes in the list
*
@@ -368,7 +371,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* - >=0 - Index of the last futex that was awoken;
* - -1 - No futex was awoken
*/
-static int unqueue_multiple(struct futex_vector *v, int count)
+int futex_unqueue_multiple(struct futex_vector *v, int count)
{
int ret = -1, i;
@@ -396,14 +399,19 @@ static int unqueue_multiple(struct futex_vector *v, int count)
* - 0 - Success
* - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
*/
-static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
+int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
{
- struct futex_hash_bucket *hb;
bool retry = false;
int ret, i;
u32 uval;
/*
+ * Make sure to have a reference on the private_hash such that we
+ * don't block on rehash after changing the task state below.
+ */
+ guard(private_hash)();
+
+ /*
* Enqueuing multiple futexes is tricky, because we need to enqueue
* each futex on the list before dealing with the next one to avoid
* deadlocking on the hash bucket. But, before enqueuing, we need to
@@ -419,11 +427,11 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo
*/
retry:
for (i = 0; i < count; i++) {
- if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
+ if (!(vs[i].w.flags & FLAGS_SHARED) && retry)
continue;
ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
- !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
+ vs[i].w.flags,
&vs[i].q.key, FUTEX_READ);
if (unlikely(ret))
@@ -435,22 +443,26 @@ retry:
for (i = 0; i < count; i++) {
u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
struct futex_q *q = &vs[i].q;
- u32 val = (u32)vs[i].w.val;
+ u32 val = vs[i].w.val;
- hb = futex_q_lock(q);
- ret = futex_get_value_locked(&uval, uaddr);
+ if (1) {
+ CLASS(hb, hb)(&q->key);
- if (!ret && uval == val) {
- /*
- * The bucket lock can't be held while dealing with the
- * next futex. Queue each futex at this moment so hb can
- * be unlocked.
- */
- futex_queue(q, hb);
- continue;
- }
+ futex_q_lock(q, hb);
+ ret = futex_get_value_locked(&uval, uaddr);
- futex_q_unlock(hb);
+ if (!ret && uval == val) {
+ /*
+ * The bucket lock can't be held while dealing with the
+ * next futex. Queue each futex at this moment so hb can
+ * be unlocked.
+ */
+ futex_queue(q, hb, current);
+ continue;
+ }
+
+ futex_q_unlock(hb);
+ }
__set_current_state(TASK_RUNNING);
/*
@@ -458,7 +470,7 @@ retry:
* was woken, we don't return error and return this index to
* userspace
*/
- *woken = unqueue_multiple(vs, i);
+ *woken = futex_unqueue_multiple(vs, i);
if (*woken >= 0)
return 1;
@@ -543,7 +555,7 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
__set_current_state(TASK_RUNNING);
- ret = unqueue_multiple(vs, count);
+ ret = futex_unqueue_multiple(vs, count);
if (ret >= 0)
return ret;
@@ -564,7 +576,8 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
* @val: the expected value
* @flags: futex flags (FLAGS_SHARED, etc.)
* @q: the associated futex_q
- * @hb: storage for hash_bucket pointer to be returned to caller
+ * @key2: the second futex_key if used for requeue PI
+ * @task: Task queueing this futex
*
* Setup the futex_q and locate the hash_bucket. Get the futex value and
* compare it with the expected value. Handle atomic faults internally.
@@ -572,10 +585,12 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
*
* Return:
* - 0 - uaddr contains val and hb has been locked;
- * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
+ * - <0 - On error and the hb is unlocked. A possible reason: the uaddr can not
+ * be read, does not contain the expected value or is not properly aligned.
*/
int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
- struct futex_q *q, struct futex_hash_bucket **hb)
+ struct futex_q *q, union futex_key *key2,
+ struct task_struct *task)
{
u32 uval;
int ret;
@@ -599,69 +614,84 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
* while the syscall executes.
*/
retry:
- ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
+ ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ);
if (unlikely(ret != 0))
return ret;
retry_private:
- *hb = futex_q_lock(q);
+ if (1) {
+ CLASS(hb, hb)(&q->key);
- ret = futex_get_value_locked(&uval, uaddr);
+ futex_q_lock(q, hb);
- if (ret) {
- futex_q_unlock(*hb);
+ ret = futex_get_value_locked(&uval, uaddr);
- ret = get_user(uval, uaddr);
- if (ret)
- return ret;
+ if (ret) {
+ futex_q_unlock(hb);
+
+ ret = get_user(uval, uaddr);
+ if (ret)
+ return ret;
- if (!(flags & FLAGS_SHARED))
- goto retry_private;
+ if (!(flags & FLAGS_SHARED))
+ goto retry_private;
- goto retry;
- }
+ goto retry;
+ }
- if (uval != val) {
- futex_q_unlock(*hb);
- ret = -EWOULDBLOCK;
+ if (uval != val) {
+ futex_q_unlock(hb);
+ return -EWOULDBLOCK;
+ }
+
+ if (key2 && futex_match(&q->key, key2)) {
+ futex_q_unlock(hb);
+ return -EINVAL;
+ }
+
+ /*
+ * The task state is guaranteed to be set before another task can
+ * wake it. set_current_state() is implemented using smp_store_mb() and
+ * futex_queue() calls spin_unlock() upon completion, both serializing
+ * access to the hash list and forcing another memory barrier.
+ */
+ if (task == current)
+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ futex_queue(q, hb, task);
}
return ret;
}
-int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
+int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+ struct hrtimer_sleeper *to, u32 bitset)
{
- struct hrtimer_sleeper timeout, *to;
- struct restart_block *restart;
- struct futex_hash_bucket *hb;
struct futex_q q = futex_q_init;
int ret;
if (!bitset)
return -EINVAL;
+
q.bitset = bitset;
- to = futex_setup_timer(abs_time, &timeout, flags,
- current->timer_slack_ns);
retry:
/*
* Prepare to wait on uaddr. On success, it holds hb->lock and q
* is initialized.
*/
- ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
+ ret = futex_wait_setup(uaddr, val, flags, &q, NULL, current);
if (ret)
- goto out;
+ return ret;
/* futex_queue and wait for wakeup, timeout, or a signal. */
- futex_wait_queue(hb, &q, to);
+ futex_do_wait(&q, to);
/* If we were woken (and unqueued), we succeeded, whatever. */
- ret = 0;
if (!futex_unqueue(&q))
- goto out;
- ret = -ETIMEDOUT;
+ return 0;
+
if (to && !to->task)
- goto out;
+ return -ETIMEDOUT;
/*
* We expect signal_pending(current), but we might be the
@@ -670,24 +700,38 @@ retry:
if (!signal_pending(current))
goto retry;
- ret = -ERESTARTSYS;
- if (!abs_time)
- goto out;
+ return -ERESTARTSYS;
+}
+
+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
+{
+ struct hrtimer_sleeper timeout, *to;
+ struct restart_block *restart;
+ int ret;
- restart = &current->restart_block;
- restart->futex.uaddr = uaddr;
- restart->futex.val = val;
- restart->futex.time = *abs_time;
- restart->futex.bitset = bitset;
- restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
+ to = futex_setup_timer(abs_time, &timeout, flags,
+ current->timer_slack_ns);
- ret = set_restart_fn(restart, futex_wait_restart);
+ ret = __futex_wait(uaddr, flags, val, to, bitset);
+
+ /* No timeout, nothing to clean up. */
+ if (!to)
+ return ret;
-out:
- if (to) {
- hrtimer_cancel(&to->timer);
- destroy_hrtimer_on_stack(&to->timer);
+ hrtimer_cancel(&to->timer);
+ destroy_hrtimer_on_stack(&to->timer);
+
+ if (ret == -ERESTARTSYS) {
+ restart = &current->restart_block;
+ restart->futex.uaddr = uaddr;
+ restart->futex.val = val;
+ restart->futex.time = *abs_time;
+ restart->futex.bitset = bitset;
+ restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
+
+ return set_restart_fn(restart, futex_wait_restart);
}
+
return ret;
}