diff options
Diffstat (limited to 'lib/rhashtable.c')
-rw-r--r-- | lib/rhashtable.c | 164 |
1 files changed, 87 insertions, 77 deletions
diff --git a/lib/rhashtable.c b/lib/rhashtable.c index fc0d451279f0..5f8fe3e88219 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -27,17 +27,12 @@ #include <linux/err.h> #define HASH_DEFAULT_SIZE 64UL -#define HASH_MIN_SIZE 4UL +#define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 128UL /* Base bits plus 1 bit for nulls marker */ #define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) -enum { - RHT_LOCK_NORMAL, - RHT_LOCK_NESTED, -}; - /* The bucket lock is selected based on the hash and protects mutations * on a group of hash buckets. * @@ -146,8 +141,13 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_rcu(struct rcu_head *head) +{ + bucket_table_free(container_of(head, struct bucket_table, rcu)); +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets, u32 hash_rnd) + size_t nbuckets) { struct bucket_table *tbl = NULL; size_t size; @@ -162,14 +162,16 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, return NULL; tbl->size = nbuckets; - tbl->shift = ilog2(nbuckets); - tbl->hash_rnd = hash_rnd; if (alloc_bucket_locks(ht, tbl) < 0) { bucket_table_free(tbl); return NULL; } + INIT_LIST_HEAD(&tbl->walkers); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); @@ -186,7 +188,7 @@ static bool rht_grow_above_75(const struct rhashtable *ht, { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && - (!ht->p.max_shift || tbl->shift < ht->p.max_shift); + (!ht->p.max_size || tbl->size < ht->p.max_size); } /** @@ -199,13 +201,14 @@ static bool rht_shrink_below_30(const struct rhashtable *ht, { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && - tbl->shift > ht->p.min_shift; + tbl->size > ht->p.min_size; } static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) { - struct bucket_table *new_tbl = rht_dereference(ht->future_tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl = + rht_dereference(old_tbl->future_tbl, ht) ?: old_tbl; struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; int err = -ENOENT; struct rhash_head *head, *next, *entry; @@ -229,7 +232,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash) new_bucket_lock = bucket_lock(new_tbl, new_hash); - spin_lock_nested(new_bucket_lock, RHT_LOCK_NESTED); + spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); head = rht_dereference_bucket(new_tbl->buckets[new_hash], new_tbl, new_hash); @@ -257,6 +260,7 @@ static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash) spin_lock_bh(old_bucket_lock); while (!rhashtable_rehash_one(ht, old_hash)) ; + old_tbl->rehash++; spin_unlock_bh(old_bucket_lock); } @@ -264,16 +268,13 @@ static void rhashtable_rehash(struct rhashtable *ht, struct bucket_table *new_tbl) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct rhashtable_walker *walker; unsigned old_hash; - get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd)); - /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. - * The synchronize_rcu() guarantees for the new table to be picked up - * so no new additions go into the old table while we relink. */ - rcu_assign_pointer(ht->future_tbl, new_tbl); + rcu_assign_pointer(old_tbl->future_tbl, new_tbl); /* Ensure the new table is visible to readers. */ smp_wmb(); @@ -284,13 +285,14 @@ static void rhashtable_rehash(struct rhashtable *ht, /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); + list_for_each_entry(walker, &old_tbl->walkers, list) + walker->tbl = NULL; + /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. */ - synchronize_rcu(); - - bucket_table_free(old_tbl); + call_rcu(&old_tbl->rcu, bucket_table_free_rcu); } /** @@ -314,7 +316,7 @@ int rhashtable_expand(struct rhashtable *ht) ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, old_tbl->hash_rnd); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); if (new_tbl == NULL) return -ENOMEM; @@ -345,7 +347,7 @@ int rhashtable_shrink(struct rhashtable *ht) ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size / 2, old_tbl->hash_rnd); + new_tbl = bucket_table_alloc(ht, old_tbl->size / 2); if (new_tbl == NULL) return -ENOMEM; @@ -358,7 +360,6 @@ static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; - struct rhashtable_walker *walker; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); @@ -367,9 +368,6 @@ static void rht_deferred_worker(struct work_struct *work) tbl = rht_dereference(ht->tbl, ht); - list_for_each_entry(walker, &ht->walkers, list) - walker->resize = true; - if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); else if (rht_shrink_below_30(ht, tbl)) @@ -385,14 +383,16 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, struct rhash_head *head; bool no_resize_running; unsigned hash; + spinlock_t *old_lock; bool success = true; rcu_read_lock(); old_tbl = rht_dereference_rcu(ht->tbl, ht); hash = head_hashfn(ht, old_tbl, obj); + old_lock = bucket_lock(old_tbl, hash); - spin_lock_bh(bucket_lock(old_tbl, hash)); + spin_lock_bh(old_lock); /* Because we have already taken the bucket lock in old_tbl, * if we find that future_tbl is not yet visible then that @@ -400,10 +400,10 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, * also grab the bucket lock in old_tbl because until the * rehash completes ht->tbl won't be changed. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); + tbl = rht_dereference_rcu(old_tbl->future_tbl, ht) ?: old_tbl; if (tbl != old_tbl) { hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(bucket_lock(tbl, hash), RHT_LOCK_NESTED); + spin_lock_nested(bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); } if (compare && @@ -429,13 +429,10 @@ static bool __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, schedule_work(&ht->run_work); exit: - if (tbl != old_tbl) { - hash = head_hashfn(ht, tbl, obj); + if (tbl != old_tbl) spin_unlock(bucket_lock(tbl, hash)); - } - hash = head_hashfn(ht, old_tbl, obj); - spin_unlock_bh(bucket_lock(old_tbl, hash)); + spin_unlock_bh(old_lock); rcu_read_unlock(); @@ -512,28 +509,25 @@ static bool __rhashtable_remove(struct rhashtable *ht, */ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) { - struct bucket_table *tbl, *old_tbl; + struct bucket_table *tbl; bool ret; rcu_read_lock(); - old_tbl = rht_dereference_rcu(ht->tbl, ht); - ret = __rhashtable_remove(ht, old_tbl, obj); + tbl = rht_dereference_rcu(ht->tbl, ht); /* Because we have already taken (and released) the bucket * lock in old_tbl, if we find that future_tbl is not yet * visible then that guarantees the entry to still be in - * old_tbl if it exists. + * the old tbl if it exists. */ - tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (!ret && old_tbl != tbl) - ret = __rhashtable_remove(ht, tbl, obj); + while (!(ret = __rhashtable_remove(ht, tbl, obj)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; if (ret) { - bool no_resize_running = tbl == old_tbl; - atomic_dec(&ht->nelems); - if (no_resize_running && rht_shrink_below_30(ht, tbl)) + if (rht_shrink_below_30(ht, tbl)) schedule_work(&ht->run_work); } @@ -599,7 +593,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup); void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg) { - const struct bucket_table *tbl, *old_tbl; + const struct bucket_table *tbl; struct rhash_head *he; u32 hash; @@ -618,9 +612,8 @@ restart: /* Ensure we see any new tables. */ smp_rmb(); - old_tbl = tbl; - tbl = rht_dereference_rcu(ht->future_tbl, ht); - if (unlikely(tbl != old_tbl)) + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) goto restart; rcu_read_unlock(); @@ -725,11 +718,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; - INIT_LIST_HEAD(&iter->walker->list); - iter->walker->resize = false; - mutex_lock(&ht->mutex); - list_add(&iter->walker->list, &ht->walkers); + iter->walker->tbl = rht_dereference(ht->tbl, ht); + list_add(&iter->walker->list, &iter->walker->tbl->walkers); mutex_unlock(&ht->mutex); return 0; @@ -745,7 +736,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { mutex_lock(&iter->ht->mutex); - list_del(&iter->walker->list); + if (iter->walker->tbl) + list_del(&iter->walker->list); mutex_unlock(&iter->ht->mutex); kfree(iter->walker); } @@ -766,13 +758,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * by calling rhashtable_walk_next. */ int rhashtable_walk_start(struct rhashtable_iter *iter) + __acquires(RCU) { + struct rhashtable *ht = iter->ht; + + mutex_lock(&ht->mutex); + + if (iter->walker->tbl) + list_del(&iter->walker->list); + rcu_read_lock(); - if (iter->walker->resize) { - iter->slot = 0; - iter->skip = 0; - iter->walker->resize = false; + mutex_unlock(&ht->mutex); + + if (!iter->walker->tbl) { + iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -794,13 +794,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - const struct bucket_table *tbl; + struct bucket_table *tbl = iter->walker->tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; void *obj = NULL; - tbl = rht_dereference_rcu(ht->tbl, ht); - if (p) { p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); goto next; @@ -826,17 +824,17 @@ next: iter->skip = 0; } - iter->p = NULL; - -out: - if (iter->walker->resize) { - iter->p = NULL; + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker->tbl) { iter->slot = 0; iter->skip = 0; - iter->walker->resize = false; return ERR_PTR(-EAGAIN); } + iter->p = NULL; + +out: + return obj; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); @@ -848,16 +846,34 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next); * Finish a hash table walk. */ void rhashtable_walk_stop(struct rhashtable_iter *iter) + __releases(RCU) { - rcu_read_unlock(); + struct rhashtable *ht; + struct bucket_table *tbl = iter->walker->tbl; + + if (!tbl) + goto out; + + ht = iter->ht; + + mutex_lock(&ht->mutex); + if (tbl->rehash < tbl->size) + list_add(&iter->walker->list, &tbl->walkers); + else + iter->walker->tbl = NULL; + mutex_unlock(&ht->mutex); + iter->p = NULL; + +out: + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - 1UL << params->min_shift); + (unsigned long)params->min_size); } /** @@ -907,7 +923,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; - u32 hash_rnd; size = HASH_DEFAULT_SIZE; @@ -918,8 +933,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) return -EINVAL; - params->min_shift = max_t(size_t, params->min_shift, - ilog2(HASH_MIN_SIZE)); + params->min_size = max(params->min_size, HASH_MIN_SIZE); if (params->nelem_hint) size = rounded_hashtable_size(params); @@ -927,23 +941,19 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); memcpy(&ht->p, params, sizeof(*params)); - INIT_LIST_HEAD(&ht->walkers); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - - tbl = bucket_table_alloc(ht, size, hash_rnd); + tbl = bucket_table_alloc(ht, size); if (tbl == NULL) return -ENOMEM; atomic_set(&ht->nelems, 0); RCU_INIT_POINTER(ht->tbl, tbl); - RCU_INIT_POINTER(ht->future_tbl, tbl); INIT_WORK(&ht->run_work, rht_deferred_worker); |