diff options
Diffstat (limited to 'kernel/futex')
-rw-r--r-- | kernel/futex/core.c | 293 | ||||
-rw-r--r-- | kernel/futex/futex.h | 2 |
2 files changed, 241 insertions, 54 deletions
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 565f9717c6ca..d9bb5567af0c 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -42,7 +42,6 @@ #include <linux/fault-inject.h> #include <linux/slab.h> #include <linux/prctl.h> -#include <linux/rcuref.h> #include <linux/mempolicy.h> #include <linux/mmap_lock.h> @@ -65,12 +64,11 @@ static struct { #define futex_queues (__futex_data.queues) struct futex_private_hash { - rcuref_t users; + int state; unsigned int hash_mask; struct rcu_head rcu; void *mm; bool custom; - bool immutable; struct futex_hash_bucket queues[]; }; @@ -129,6 +127,12 @@ static struct futex_hash_bucket * __futex_hash(union futex_key *key, struct futex_private_hash *fph); #ifdef CONFIG_FUTEX_PRIVATE_HASH +static bool futex_ref_get(struct futex_private_hash *fph); +static bool futex_ref_put(struct futex_private_hash *fph); +static bool futex_ref_is_dead(struct futex_private_hash *fph); + +enum { FR_PERCPU = 0, FR_ATOMIC }; + static inline bool futex_key_is_private(union futex_key *key) { /* @@ -138,19 +142,14 @@ static inline bool futex_key_is_private(union futex_key *key) return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); } -bool futex_private_hash_get(struct futex_private_hash *fph) +static bool futex_private_hash_get(struct futex_private_hash *fph) { - if (fph->immutable) - return true; - return rcuref_get(&fph->users); + return futex_ref_get(fph); } void futex_private_hash_put(struct futex_private_hash *fph) { - /* Ignore return value, last put is verified via rcuref_is_dead() */ - if (fph->immutable) - return; - if (rcuref_put(&fph->users)) + if (futex_ref_put(fph)) wake_up_var(fph->mm); } @@ -243,14 +242,18 @@ static bool __futex_pivot_hash(struct mm_struct *mm, fph = rcu_dereference_protected(mm->futex_phash, lockdep_is_held(&mm->futex_hash_lock)); if (fph) { - if (!rcuref_is_dead(&fph->users)) { + if (!futex_ref_is_dead(fph)) { mm->futex_phash_new = new; return false; } futex_rehash_private(fph, new); } - rcu_assign_pointer(mm->futex_phash, new); + new->state = FR_PERCPU; + scoped_guard(rcu) { + mm->futex_batches = get_state_synchronize_rcu(); + rcu_assign_pointer(mm->futex_phash, new); + } kvfree_rcu(fph, rcu); return true; } @@ -289,9 +292,7 @@ again: if (!fph) return NULL; - if (fph->immutable) - return fph; - if (rcuref_get(&fph->users)) + if (futex_private_hash_get(fph)) return fph; } futex_pivot_hash(mm); @@ -583,8 +584,8 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, if (futex_get_value(&node, naddr)) return -EFAULT; - if (node != FUTEX_NO_NODE && - (node >= MAX_NUMNODES || !node_possible(node))) + if ((node != FUTEX_NO_NODE) && + ((unsigned int)node >= MAX_NUMNODES || !node_possible(node))) return -EINVAL; } @@ -1524,19 +1525,221 @@ static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, } #define FH_CUSTOM 0x01 -#define FH_IMMUTABLE 0x02 #ifdef CONFIG_FUTEX_PRIVATE_HASH + +/* + * futex-ref + * + * Heavily inspired by percpu-rwsem/percpu-refcount; not reusing any of that + * code because it just doesn't fit right. + * + * Dual counter, per-cpu / atomic approach like percpu-refcount, except it + * re-initializes the state automatically, such that the fph swizzle is also a + * transition back to per-cpu. + */ + +static void futex_ref_rcu(struct rcu_head *head); + +static void __futex_ref_atomic_begin(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + /* + * The counter we're about to switch to must have fully switched; + * otherwise it would be impossible for it to have reported success + * from futex_ref_is_dead(). + */ + WARN_ON_ONCE(atomic_long_read(&mm->futex_atomic) != 0); + + /* + * Set the atomic to the bias value such that futex_ref_{get,put}() + * will never observe 0. Will be fixed up in __futex_ref_atomic_end() + * when folding in the percpu count. + */ + atomic_long_set(&mm->futex_atomic, LONG_MAX); + smp_store_release(&fph->state, FR_ATOMIC); + + call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); +} + +static void __futex_ref_atomic_end(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + unsigned int count = 0; + long ret; + int cpu; + + /* + * Per __futex_ref_atomic_begin() the state of the fph must be ATOMIC + * and per this RCU callback, everybody must now observe this state and + * use the atomic variable. + */ + WARN_ON_ONCE(fph->state != FR_ATOMIC); + + /* + * Therefore the per-cpu counter is now stable, sum and reset. + */ + for_each_possible_cpu(cpu) { + unsigned int *ptr = per_cpu_ptr(mm->futex_ref, cpu); + count += *ptr; + *ptr = 0; + } + + /* + * Re-init for the next cycle. + */ + this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + + /* + * Add actual count, subtract bias and initial refcount. + * + * The moment this atomic operation happens, futex_ref_is_dead() can + * become true. + */ + ret = atomic_long_add_return(count - LONG_MAX - 1, &mm->futex_atomic); + if (!ret) + wake_up_var(mm); + + WARN_ON_ONCE(ret < 0); + mmput_async(mm); +} + +static void futex_ref_rcu(struct rcu_head *head) +{ + struct mm_struct *mm = container_of(head, struct mm_struct, futex_rcu); + struct futex_private_hash *fph = rcu_dereference_raw(mm->futex_phash); + + if (fph->state == FR_PERCPU) { + /* + * Per this extra grace-period, everybody must now observe + * fph as the current fph and no previously observed fph's + * are in-flight. + * + * Notably, nobody will now rely on the atomic + * futex_ref_is_dead() state anymore so we can begin the + * migration of the per-cpu counter into the atomic. + */ + __futex_ref_atomic_begin(fph); + return; + } + + __futex_ref_atomic_end(fph); +} + +/* + * Drop the initial refcount and transition to atomics. + */ +static void futex_ref_drop(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + /* + * Can only transition the current fph; + */ + WARN_ON_ONCE(rcu_dereference_raw(mm->futex_phash) != fph); + /* + * We enqueue at least one RCU callback. Ensure mm stays if the task + * exits before the transition is completed. + */ + mmget(mm); + + /* + * In order to avoid the following scenario: + * + * futex_hash() __futex_pivot_hash() + * guard(rcu); guard(mm->futex_hash_lock); + * fph = mm->futex_phash; + * rcu_assign_pointer(&mm->futex_phash, new); + * futex_hash_allocate() + * futex_ref_drop() + * fph->state = FR_ATOMIC; + * atomic_set(, BIAS); + * + * futex_private_hash_get(fph); // OOPS + * + * Where an old fph (which is FR_ATOMIC) and should fail on + * inc_not_zero, will succeed because a new transition is started and + * the atomic is bias'ed away from 0. + * + * There must be at least one full grace-period between publishing a + * new fph and trying to replace it. + */ + if (poll_state_synchronize_rcu(mm->futex_batches)) { + /* + * There was a grace-period, we can begin now. + */ + __futex_ref_atomic_begin(fph); + return; + } + + call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); +} + +static bool futex_ref_get(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) { + this_cpu_inc(*mm->futex_ref); + return true; + } + + return atomic_long_inc_not_zero(&mm->futex_atomic); +} + +static bool futex_ref_put(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) { + this_cpu_dec(*mm->futex_ref); + return false; + } + + return atomic_long_dec_and_test(&mm->futex_atomic); +} + +static bool futex_ref_is_dead(struct futex_private_hash *fph) +{ + struct mm_struct *mm = fph->mm; + + guard(rcu)(); + + if (smp_load_acquire(&fph->state) == FR_PERCPU) + return false; + + return atomic_long_read(&mm->futex_atomic) == 0; +} + +int futex_mm_init(struct mm_struct *mm) +{ + mutex_init(&mm->futex_hash_lock); + RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; + /* futex-ref */ + atomic_long_set(&mm->futex_atomic, 0); + mm->futex_batches = get_state_synchronize_rcu(); + mm->futex_ref = alloc_percpu(unsigned int); + if (!mm->futex_ref) + return -ENOMEM; + this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + return 0; +} + void futex_hash_free(struct mm_struct *mm) { struct futex_private_hash *fph; + free_percpu(mm->futex_ref); kvfree(mm->futex_phash_new); fph = rcu_dereference_raw(mm->futex_phash); - if (fph) { - WARN_ON_ONCE(rcuref_read(&fph->users) > 1); + if (fph) kvfree(fph); - } } static bool futex_pivot_pending(struct mm_struct *mm) @@ -1549,7 +1752,7 @@ static bool futex_pivot_pending(struct mm_struct *mm) return true; fph = rcu_dereference(mm->futex_phash); - return rcuref_is_dead(&fph->users); + return futex_ref_is_dead(fph); } static bool futex_hash_less(struct futex_private_hash *a, @@ -1591,21 +1794,20 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) */ scoped_guard(rcu) { fph = rcu_dereference(mm->futex_phash); - if (fph && (!fph->hash_mask || fph->immutable)) { + if (fph && !fph->hash_mask) { if (custom) return -EBUSY; return 0; } } - fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); + fph = kvzalloc(struct_size(fph, queues, hash_slots), + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!fph) return -ENOMEM; - rcuref_init(&fph->users, 1); fph->hash_mask = hash_slots ? hash_slots - 1 : 0; fph->custom = custom; - fph->immutable = !!(flags & FH_IMMUTABLE); fph->mm = mm; for (i = 0; i < hash_slots; i++) @@ -1629,13 +1831,23 @@ again: mm->futex_phash_new = NULL; if (fph) { + if (cur && !cur->hash_mask) { + /* + * If two threads simultaneously request the global + * hash then the first one performs the switch, + * the second one returns here. + */ + free = fph; + mm->futex_phash_new = new; + return -EBUSY; + } if (cur && !new) { /* * If we have an existing hash, but do not yet have * allocated a replacement hash, drop the initial * reference on the existing hash. */ - futex_private_hash_put(cur); + futex_ref_drop(cur); } if (new) { @@ -1712,19 +1924,6 @@ static int futex_hash_get_slots(void) return 0; } -static int futex_hash_get_immutable(void) -{ - struct futex_private_hash *fph; - - guard(rcu)(); - fph = rcu_dereference(current->mm->futex_phash); - if (fph && fph->immutable) - return 1; - if (fph && !fph->hash_mask) - return 1; - return 0; -} - #else static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) @@ -1737,10 +1936,6 @@ static int futex_hash_get_slots(void) return 0; } -static int futex_hash_get_immutable(void) -{ - return 0; -} #endif int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) @@ -1750,10 +1945,8 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) switch (arg2) { case PR_FUTEX_HASH_SET_SLOTS: - if (arg4 & ~FH_FLAG_IMMUTABLE) + if (arg4) return -EINVAL; - if (arg4 & FH_FLAG_IMMUTABLE) - flags |= FH_IMMUTABLE; ret = futex_hash_allocate(arg3, flags); break; @@ -1761,10 +1954,6 @@ int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) ret = futex_hash_get_slots(); break; - case PR_FUTEX_HASH_GET_IMMUTABLE: - ret = futex_hash_get_immutable(); - break; - default: ret = -EINVAL; break; diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index fcd1617212ee..c74eac572acd 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -228,14 +228,12 @@ extern void futex_hash_get(struct futex_hash_bucket *hb); extern void futex_hash_put(struct futex_hash_bucket *hb); extern struct futex_private_hash *futex_private_hash(void); -extern bool futex_private_hash_get(struct futex_private_hash *fph); extern void futex_private_hash_put(struct futex_private_hash *fph); #else /* !CONFIG_FUTEX_PRIVATE_HASH */ static inline void futex_hash_get(struct futex_hash_bucket *hb) { } static inline void futex_hash_put(struct futex_hash_bucket *hb) { } static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } -static inline bool futex_private_hash_get(void) { return false; } static inline void futex_private_hash_put(struct futex_private_hash *fph) { } #endif |