summaryrefslogtreecommitdiff
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c194
1 files changed, 60 insertions, 134 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 0cf84c8664f2..b59532862bc0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -135,8 +135,7 @@
*
* Where (A) orders the waiters increment and the futex value read through
* atomic operations (see hb_waiters_inc) and where (B) orders the write
- * to futex and the waiters read -- this is done by the barriers for both
- * shared and private futexes in get_futex_key_refs().
+ * to futex and the waiters read (see hb_waiters_pending()).
*
* This yields the following case (where X:=waiters, Y:=futex):
*
@@ -331,17 +330,6 @@ static void compat_exit_robust_list(struct task_struct *curr);
static inline void compat_exit_robust_list(struct task_struct *curr) { }
#endif
-static inline void futex_get_mm(union futex_key *key)
-{
- mmgrab(key->private.mm);
- /*
- * Ensure futex_get_mm() implies a full barrier such that
- * get_futex_key() implies a full barrier. This is relied upon
- * as smp_mb(); (B), see the ordering comment above.
- */
- smp_mb__after_atomic();
-}
-
/*
* Reflects a new waiter being added to the waitqueue.
*/
@@ -370,6 +358,10 @@ static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
{
#ifdef CONFIG_SMP
+ /*
+ * Full barrier (B), see the ordering comment above.
+ */
+ smp_mb();
return atomic_read(&hb->waiters);
#else
return 1;
@@ -385,9 +377,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
- u32 hash = jhash2((u32*)&key->both.word,
- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+ u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
key->both.offset);
+
return &futex_queues[hash & (futex_hashsize - 1)];
}
@@ -407,70 +399,6 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
&& key1->both.offset == key2->both.offset);
}
-/*
- * Take a reference to the resource addressed by a key.
- * Can be called while holding spinlocks.
- *
- */
-static void get_futex_key_refs(union futex_key *key)
-{
- if (!key->both.ptr)
- return;
-
- /*
- * On MMU less systems futexes are always "private" as there is no per
- * process address space. We need the smp wmb nevertheless - yes,
- * arch/blackfin has MMU less SMP ...
- */
- if (!IS_ENABLED(CONFIG_MMU)) {
- smp_mb(); /* explicit smp_mb(); (B) */
- return;
- }
-
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- ihold(key->shared.inode); /* implies smp_mb(); (B) */
- break;
- case FUT_OFF_MMSHARED:
- futex_get_mm(key); /* implies smp_mb(); (B) */
- break;
- default:
- /*
- * Private futexes do not hold reference on an inode or
- * mm, therefore the only purpose of calling get_futex_key_refs
- * is because we need the barrier for the lockless waiter check.
- */
- smp_mb(); /* explicit smp_mb(); (B) */
- }
-}
-
-/*
- * Drop a reference to the resource addressed by a key.
- * The hash bucket spinlock must not be held. This is
- * a no-op for private futexes, see comment in the get
- * counterpart.
- */
-static void drop_futex_key_refs(union futex_key *key)
-{
- if (!key->both.ptr) {
- /* If we're here then we tried to put a key we failed to get */
- WARN_ON_ONCE(1);
- return;
- }
-
- if (!IS_ENABLED(CONFIG_MMU))
- return;
-
- switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
- case FUT_OFF_INODE:
- iput(key->shared.inode);
- break;
- case FUT_OFF_MMSHARED:
- mmdrop(key->private.mm);
- break;
- }
-}
-
enum futex_access {
FUTEX_READ,
FUTEX_WRITE
@@ -505,6 +433,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
return timeout;
}
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+ static atomic64_t i_seq;
+ u64 old;
+
+ /* Does the inode already have a sequence number? */
+ old = atomic64_read(&inode->i_sequence);
+ if (likely(old))
+ return old;
+
+ for (;;) {
+ u64 new = atomic64_add_return(1, &i_seq);
+ if (WARN_ON_ONCE(!new))
+ continue;
+
+ old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+ if (old)
+ return old;
+ return new;
+ }
+}
+
/**
* get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex
@@ -517,9 +485,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
*
* The key words are stored in @key on success.
*
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page). For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ * ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ * ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
@@ -556,7 +530,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
if (!fshared) {
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
return 0;
}
@@ -659,8 +632,6 @@ again:
key->private.mm = mm;
key->private.address = address;
- get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
} else {
struct inode *inode;
@@ -692,36 +663,8 @@ again:
goto again;
}
- /*
- * Take a reference unless it is about to be freed. Previously
- * this reference was taken by ihold under the page lock
- * pinning the inode in place so i_lock was unnecessary. The
- * only way for this check to fail is if the inode was
- * truncated in parallel which is almost certainly an
- * application bug. In such a case, just retry.
- *
- * We are not calling into get_futex_key_refs() in file-backed
- * cases, therefore a successful atomic_inc return below will
- * guarantee that get_futex_key() will still imply smp_mb(); (B).
- */
- if (!atomic_inc_not_zero(&inode->i_count)) {
- rcu_read_unlock();
- put_page(page);
-
- goto again;
- }
-
- /* Should be impossible but lets be paranoid for now */
- if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
- err = -EFAULT;
- rcu_read_unlock();
- iput(inode);
-
- goto out;
- }
-
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
- key->shared.inode = inode;
+ key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail);
rcu_read_unlock();
}
@@ -733,7 +676,6 @@ out:
static inline void put_futex_key(union futex_key *key)
{
- drop_futex_key_refs(key);
}
/**
@@ -1723,10 +1665,9 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
oparg = 1 << oparg;
}
- if (!access_ok(uaddr, sizeof(u32)))
- return -EFAULT;
-
+ pagefault_disable();
ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
+ pagefault_enable();
if (ret)
return ret;
@@ -1868,7 +1809,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
}
- get_futex_key_refs(key2);
q->key = *key2;
}
@@ -1890,7 +1830,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- get_futex_key_refs(key);
q->key = *key;
__unqueue_futex(q);
@@ -2001,7 +1940,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
u32 *cmpval, int requeue_pi)
{
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
- int drop_count = 0, task_count = 0, ret;
+ int task_count = 0, ret;
struct futex_pi_state *pi_state = NULL;
struct futex_hash_bucket *hb1, *hb2;
struct futex_q *this, *next;
@@ -2122,7 +2061,6 @@ retry_private:
*/
if (ret > 0) {
WARN_ON(pi_state);
- drop_count++;
task_count++;
/*
* If we acquired the lock, then the user space value
@@ -2242,7 +2180,6 @@ retry_private:
* doing so.
*/
requeue_pi_wake_futex(this, &key2, hb2);
- drop_count++;
continue;
} else if (ret) {
/*
@@ -2263,7 +2200,6 @@ retry_private:
}
}
requeue_futex(this, hb1, hb2, &key2);
- drop_count++;
}
/*
@@ -2278,15 +2214,6 @@ out_unlock:
wake_up_q(&wake_q);
hb_waiters_dec(hb2);
- /*
- * drop_futex_key_refs() must be called outside the spinlocks. During
- * the requeue we moved futex_q's from the hash bucket at key1 to the
- * one at key2 and updated their key pointer. We no longer need to
- * hold the references to key1.
- */
- while (--drop_count >= 0)
- drop_futex_key_refs(&key1);
-
out_put_keys:
put_futex_key(&key2);
out_put_key1:
@@ -2416,7 +2343,6 @@ retry:
ret = 1;
}
- drop_futex_key_refs(&q->key);
return ret;
}