diff options
Diffstat (limited to 'include/linux/rhashtable.h')
| -rw-r--r-- | include/linux/rhashtable.h | 752 |
1 files changed, 397 insertions, 355 deletions
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7d56a7ea2b2e..08e664b21f5a 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * @@ -17,37 +18,33 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H -#include <linux/atomic.h> -#include <linux/compiler.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/jhash.h> #include <linux/list_nulls.h> #include <linux/workqueue.h> -#include <linux/mutex.h> #include <linux/rculist.h> +#include <linux/bit_spinlock.h> +#include <linux/rhashtable-types.h> /* + * Objects in an rhashtable have an embedded struct rhash_head + * which is linked into as hash chain from the hash table - or one + * of two or more hash tables when the rhashtable is being resized. * The end of the chain is marked with a special nulls marks which has - * the following format: - * - * +-------+-----------------------------------------------------+-+ - * | Base | Hash |1| - * +-------+-----------------------------------------------------+-+ - * - * Base (4 bits) : Reserved to distinguish between multiple tables. - * Specified via &struct rhashtable_params.nulls_base. - * Hash (27 bits): Full hash (unmasked) of first element added to bucket - * 1 (1 bit) : Nulls marker (always set) - * - * The remaining bits of the next pointer remain unused for now. + * the least significant bit set but otherwise stores the address of + * the hash bucket. This allows us to be sure we've found the end + * of the right list. + * The value stored in the hash bucket has BIT(0) used as a lock bit. + * This bit must be atomically set before any changes are made to + * the chain. To avoid dereferencing this pointer without clearing + * the bit first, we use an opaque 'struct rhash_lock_head *' for the + * pointer stored in the bucket. This struct needs to be defined so + * that rcu_dereference() works on it, but it has no content so a + * cast is needed for it to be useful. This ensures it isn't + * used by mistake with clearing the lock bit first. */ -#define RHT_BASE_BITS 4 -#define RHT_HASH_BITS 27 -#define RHT_BASE_SHIFT RHT_HASH_BITS - -/* Base bits plus 1 bit for nulls marker */ -#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) +struct rhash_lock_head {}; /* Maximum chain length before rehash * @@ -64,23 +61,12 @@ */ #define RHT_ELASTICITY 16u -struct rhash_head { - struct rhash_head __rcu *next; -}; - -struct rhlist_head { - struct rhash_head rhead; - struct rhlist_head __rcu *next; -}; - /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash - * @locks_mask: Mask to apply before accessing locks[] - * @locks: Array of spinlocks protecting individual buckets * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing @@ -90,143 +76,40 @@ struct rhlist_head { struct bucket_table { unsigned int size; unsigned int nest; - unsigned int rehash; u32 hash_rnd; - unsigned int locks_mask; - spinlock_t *locks; struct list_head walkers; struct rcu_head rcu; struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; -}; + struct lockdep_map dep_map; -/** - * struct rhashtable_compare_arg - Key for the function rhashtable_compare - * @ht: Hash table - * @key: Key to compare against - */ -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; + struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); -typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, - const void *obj); - -struct rhashtable; - -/** - * struct rhashtable_params - Hash table construction parameters - * @nelem_hint: Hint on number of elements, should be 75% of desired size - * @key_len: Length of key - * @key_offset: Offset of key in struct to be hashed - * @head_offset: Offset of rhash_head in struct to be hashed - * @max_size: Maximum size while expanding - * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) - * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker - * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) - * @obj_hashfn: Function to hash object - * @obj_cmpfn: Function to compare key with object - */ -struct rhashtable_params { - u16 nelem_hint; - u16 key_len; - u16 key_offset; - u16 head_offset; - unsigned int max_size; - u16 min_size; - bool automatic_shrinking; - u8 locks_mul; - u32 nulls_base; - rht_hashfn_t hashfn; - rht_obj_hashfn_t obj_hashfn; - rht_obj_cmpfn_t obj_cmpfn; -}; - -/** - * struct rhashtable - Hash table handle - * @tbl: Bucket table - * @nelems: Number of elements in table - * @key_len: Key length for hashfn - * @p: Configuration parameters - * @max_elems: Maximum number of elements in table - * @rhlist: True if this is an rhltable - * @run_work: Deferred worker to expand/shrink asynchronously - * @mutex: Mutex to protect current/future table swapping - * @lock: Spin lock to protect walker list - */ -struct rhashtable { - struct bucket_table __rcu *tbl; - atomic_t nelems; - unsigned int key_len; - struct rhashtable_params p; - unsigned int max_elems; - bool rhlist; - struct work_struct run_work; - struct mutex mutex; - spinlock_t lock; -}; - -/** - * struct rhltable - Hash table with duplicate objects in a list - * @ht: Underlying rhtable - */ -struct rhltable { - struct rhashtable ht; -}; - -/** - * struct rhashtable_walker - Hash table walker - * @list: List entry on list of walkers - * @tbl: The table that we were walking over - */ -struct rhashtable_walker { - struct list_head list; - struct bucket_table *tbl; -}; - -/** - * struct rhashtable_iter - Hash table iterator - * @ht: Table to iterate through - * @p: Current pointer - * @list: Current hash list pointer - * @walker: Associated rhashtable walker - * @slot: Current slot - * @skip: Number of entries to skip in slot +/* + * NULLS_MARKER() expects a hash value with the low + * bits mostly likely to be significant, and it discards + * the msb. + * We give it an address, in which the bottom bit is + * always 0, and the msb might be significant. + * So we shift the address down one bit to align with + * expectations and avoid losing a significant bit. + * + * We never store the NULLS_MARKER in the hash table + * itself as we need the lsb for locking. + * Instead we store a NULL */ -struct rhashtable_iter { - struct rhashtable *ht; - struct rhash_head *p; - struct rhlist_head *list; - struct rhashtable_walker walker; - unsigned int slot; - unsigned int skip; -}; - -static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) -{ - return NULLS_MARKER(ht->p.nulls_base + hash); -} - -#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ - ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) +#define RHT_NULLS_MARKER(ptr) \ + ((void *)NULLS_MARKER(((unsigned long) (ptr)) >> 1)) +#define INIT_RHT_NULLS_HEAD(ptr) \ + ((ptr) = NULL) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } -static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) -{ - return ((unsigned long) ptr) >> 1; -} - static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { @@ -236,41 +119,49 @@ static inline void *rht_obj(const struct rhashtable *ht, static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { - return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); + return hash & (tbl->size - 1); } -static inline unsigned int rht_key_hashfn( - struct rhashtable *ht, const struct bucket_table *tbl, - const void *key, const struct rhashtable_params params) +static __always_inline unsigned int rht_key_get_hash(struct rhashtable *ht, + const void *key, const struct rhashtable_params params, + unsigned int hash_rnd) { unsigned int hash; /* params must be equal to ht->p if it isn't constant. */ if (!__builtin_constant_p(params.key_len)) - hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + hash = ht->p.hashfn(key, ht->key_len, hash_rnd); else if (params.key_len) { unsigned int key_len = params.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else if (key_len & (sizeof(u32) - 1)) - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); else - hash = jhash2(key, key_len / sizeof(u32), - tbl->hash_rnd); + hash = jhash2(key, key_len / sizeof(u32), hash_rnd); } else { unsigned int key_len = ht->p.key_len; if (params.hashfn) - hash = params.hashfn(key, key_len, tbl->hash_rnd); + hash = params.hashfn(key, key_len, hash_rnd); else - hash = jhash(key, key_len, tbl->hash_rnd); + hash = jhash(key, key_len, hash_rnd); } + return hash; +} + +static __always_inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + unsigned int hash = rht_key_get_hash(ht, key, params, tbl->hash_rnd); + return rht_bucket_index(tbl, hash); } -static inline unsigned int rht_head_hashfn( +static __always_inline unsigned int rht_head_hashfn( struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he, const struct rhashtable_params params) { @@ -332,25 +223,6 @@ static inline bool rht_grow_above_max(const struct rhashtable *ht, return atomic_read(&ht->nelems) >= ht->max_elems; } -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ -static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, - unsigned int hash) -{ - return &tbl->locks[hash & tbl->locks_mask]; -} - #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(struct rhashtable *ht); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); @@ -367,19 +239,21 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, - const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, - const struct rhashtable_params *params); - void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); -int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU); + +static inline void rhashtable_walk_start(struct rhashtable_iter *iter) +{ + (void)rhashtable_walk_start_check(iter); +} + void *rhashtable_walk_next(struct rhashtable_iter *iter); +void *rhashtable_walk_peek(struct rhashtable_iter *iter); void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); void rhashtable_free_and_destroy(struct rhashtable *ht, @@ -387,58 +261,189 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); -struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, - unsigned int hash); -struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **__rht_bucket_nested( + const struct bucket_table *tbl, unsigned int hash); +struct rhash_lock_head __rcu **rht_bucket_nested_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_rcu(p, ht) \ - rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + rcu_dereference_all_check(p, lockdep_rht_mutex_is_held(ht)) #define rht_dereference_bucket(p, tbl, hash) \ rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_dereference_bucket_rcu(p, tbl, hash) \ - rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + rcu_dereference_all_check(p, lockdep_rht_bucket_is_held(tbl, hash)) #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) -static inline struct rhash_head __rcu *const *rht_bucket( +static inline struct rhash_lock_head __rcu *const *rht_bucket( const struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_var( +static inline struct rhash_lock_head __rcu **rht_bucket_var( struct bucket_table *tbl, unsigned int hash) { - return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : &tbl->buckets[hash]; } -static inline struct rhash_head __rcu **rht_bucket_insert( +static inline struct rhash_lock_head __rcu **rht_bucket_insert( struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) { return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : &tbl->buckets[hash]; } +/* + * We lock a bucket by setting BIT(0) in the pointer - this is always + * zero in real pointers. The NULLS mark is never stored in the bucket, + * rather we store NULL if the bucket is empty. + * bit_spin_locks do not handle contention well, but the whole point + * of the hashtable design is to achieve minimum per-bucket contention. + * A nested hash table might not have a bucket pointer. In that case + * we cannot get a lock. For remove and replace the bucket cannot be + * interesting and doesn't need locking. + * For insert we allocate the bucket if this is the last bucket_table, + * and then take the lock. + * Sometimes we unlock a bucket by writing a new pointer there. In that + * case we don't need to unlock, but we do need to reset state such as + * local_bh. For that we have rht_assign_unlock(). As rcu_assign_pointer() + * provides the same release semantics that bit_spin_unlock() provides, + * this is safe. + * When we write to a bucket without unlocking, we use rht_assign_locked(). + */ + +static inline unsigned long rht_lock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt) +{ + unsigned long flags; + + local_irq_save(flags); + bit_spin_lock(0, (unsigned long *)bkt); + lock_map_acquire(&tbl->dep_map); + return flags; +} + +static inline unsigned long rht_lock_nested(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bucket, + unsigned int subclass) +{ + unsigned long flags; + + local_irq_save(flags); + bit_spin_lock(0, (unsigned long *)bucket); + lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); + return flags; +} + +static inline void rht_unlock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt, + unsigned long flags) +{ + lock_map_release(&tbl->dep_map); + bit_spin_unlock(0, (unsigned long *)bkt); + local_irq_restore(flags); +} + +enum rht_lookup_freq { + RHT_LOOKUP_NORMAL, + RHT_LOOKUP_LIKELY, +}; + +static __always_inline struct rhash_head *__rht_ptr( + struct rhash_lock_head *p, struct rhash_lock_head __rcu *const *bkt, + const enum rht_lookup_freq freq) +{ + unsigned long p_val = (unsigned long)p & ~BIT(0); + + BUILD_BUG_ON(!__builtin_constant_p(freq)); + + if (freq == RHT_LOOKUP_LIKELY) + return (struct rhash_head *) + (likely(p_val) ? p_val : (unsigned long)RHT_NULLS_MARKER(bkt)); + else + return (struct rhash_head *) + (p_val ?: (unsigned long)RHT_NULLS_MARKER(bkt)); +} + +/* + * Where 'bkt' is a bucket and might be locked: + * rht_ptr_rcu() dereferences that pointer and clears the lock bit. + * rht_ptr() dereferences in a context where the bucket is locked. + * rht_ptr_exclusive() dereferences in a context where exclusive + * access is guaranteed, such as when destroying the table. + */ +static __always_inline struct rhash_head *__rht_ptr_rcu( + struct rhash_lock_head __rcu *const *bkt, + const enum rht_lookup_freq freq) +{ + return __rht_ptr(rcu_dereference_all(*bkt), bkt, freq); +} + +static inline struct rhash_head *rht_ptr_rcu( + struct rhash_lock_head __rcu *const *bkt) +{ + return __rht_ptr_rcu(bkt, RHT_LOOKUP_NORMAL); +} + +static inline struct rhash_head *rht_ptr( + struct rhash_lock_head __rcu *const *bkt, + struct bucket_table *tbl, + unsigned int hash) +{ + return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt, + RHT_LOOKUP_NORMAL); +} + +static inline struct rhash_head *rht_ptr_exclusive( + struct rhash_lock_head __rcu *const *bkt) +{ + return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt, + RHT_LOOKUP_NORMAL); +} + +static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) +{ + if (rht_is_a_nulls(obj)) + obj = NULL; + rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); +} + +static inline void rht_assign_unlock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj, + unsigned long flags) +{ + if (rht_is_a_nulls(obj)) + obj = NULL; + lock_map_release(&tbl->dep_map); + rcu_assign_pointer(*bkt, (void *)obj); + preempt_enable(); + __release(bitlock); + local_irq_restore(flags); +} + /** - * rht_for_each_continue - continue iterating over hash chain + * rht_for_each_from - iterate over hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index */ -#define rht_for_each_continue(pos, head, tbl, hash) \ - for (pos = rht_dereference_bucket(head, tbl, hash); \ - !rht_is_a_nulls(pos); \ +#define rht_for_each_from(pos, head, tbl, hash) \ + for (pos = head; \ + !rht_is_a_nulls(pos); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) /** @@ -448,19 +453,20 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) + rht_for_each_from(pos, rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash) /** - * rht_for_each_entry_continue - continue iterating over hash chain + * rht_for_each_entry_from - iterate over hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. */ -#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ - for (pos = rht_dereference_bucket(head, tbl, hash); \ +#define rht_for_each_entry_from(tpos, pos, head, tbl, hash, member) \ + for (pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket((pos)->next, tbl, hash)) @@ -473,8 +479,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ - tbl, hash, member) + rht_for_each_entry_from(tpos, pos, \ + rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ + tbl, hash, member) /** * rht_for_each_entry_safe - safely iterate over hash chain of given type @@ -489,7 +496,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * remove the loop cursor from the list. */ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + for (pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash), \ next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ @@ -498,9 +505,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** - * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_rcu_from - iterate over rcu hash chain from given head * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @@ -508,11 +515,11 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ +#define rht_for_each_rcu_from(pos, head, tbl, hash) \ for (({barrier(); }), \ - pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos = head; \ !rht_is_a_nulls(pos); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) /** * rht_for_each_rcu - iterate over rcu hash chain @@ -524,14 +531,17 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) +#define rht_for_each_rcu(pos, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_all(pos->next)) /** - * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * rht_for_each_entry_rcu_from - iterated over rcu hash chain from given head * @tpos: the type * to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor. - * @head: the previous &struct rhash_head to continue from + * @head: the &struct rhash_head to start from * @tbl: the &struct bucket_table * @hash: the hash value / bucket index * @member: name of the &struct rhash_head within the hashable struct. @@ -540,9 +550,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ +#define rht_for_each_entry_rcu_from(tpos, pos, head, tbl, hash, member) \ for (({barrier(); }), \ - pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + pos = head; \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) @@ -559,8 +569,9 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ - tbl, hash, member) + rht_for_each_entry_rcu_from(tpos, pos, \ + rht_ptr_rcu(rht_bucket(tbl, hash)), \ + tbl, hash, member) /** * rhl_for_each_rcu - iterate over rcu hash table list @@ -571,7 +582,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( * list returned by rhltable_lookup. */ #define rhl_for_each_rcu(pos, list) \ - for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + for (pos = list; pos; pos = rcu_dereference_all(pos->next)) /** * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type @@ -585,7 +596,7 @@ static inline struct rhash_head __rcu **rht_bucket_insert( */ #define rhl_for_each_entry_rcu(tpos, pos, list, member) \ for (pos = list; pos && rht_entry(tpos, pos, member); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_all(pos->next)) static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) @@ -597,28 +608,37 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, } /* Internal function, do not use. */ -static inline struct rhash_head *__rhashtable_lookup( +static __always_inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, - const struct rhashtable_params params) + const struct rhashtable_params params, + const enum rht_lookup_freq freq) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; + struct rhash_lock_head __rcu *const *bkt; struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; + BUILD_BUG_ON(!__builtin_constant_p(freq)); tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); - rht_for_each_rcu(he, tbl, hash) { - if (params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, he)) : - rhashtable_compare(&arg, rht_obj(ht, he))) - continue; - return he; - } + bkt = rht_bucket(tbl, hash); + do { + rht_for_each_rcu_from(he, __rht_ptr_rcu(bkt, freq), tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + return he; + } + /* An object might have been moved to a different hash chain, + * while we walk along it - better check and retry. + */ + } while (he != RHT_NULLS_MARKER(bkt)); /* Ensure we see any new tables. */ smp_rmb(); @@ -637,21 +657,32 @@ restart: * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. + * for an entry with an identical key. The first matching entry is returned. * * This must only be called under the RCU read lock. * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup( +static __always_inline void *rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { - struct rhash_head *he = __rhashtable_lookup(ht, key, params); + struct rhash_head *he = __rhashtable_lookup(ht, key, params, + RHT_LOOKUP_NORMAL); return he ? rht_obj(ht, he) : NULL; } +static __always_inline void *rhashtable_lookup_likely( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params, + RHT_LOOKUP_LIKELY); + + return likely(he) ? rht_obj(ht, he) : NULL; +} + /** * rhashtable_lookup_fast - search hash table, without RCU read lock * @ht: hash table @@ -659,14 +690,14 @@ static inline void *rhashtable_lookup( * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. + * for an entry with an identical key. The first matching entry is returned. * * Only use this function when you have other mechanisms guaranteeing * that the object won't go away after the RCU read lock is released. * * Returns the first entry on which the compare function returned true. */ -static inline void *rhashtable_lookup_fast( +static __always_inline void *rhashtable_lookup_fast( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -686,27 +717,38 @@ static inline void *rhashtable_lookup_fast( * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. All matching entries are returned + * for an entry with an identical key. All matching entries are returned * in a list. * * This must only be called under the RCU read lock. * * Returns the list of entries that match the given key. */ -static inline struct rhlist_head *rhltable_lookup( +static __always_inline struct rhlist_head *rhltable_lookup( struct rhltable *hlt, const void *key, const struct rhashtable_params params) { - struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params, + RHT_LOOKUP_NORMAL); return he ? container_of(he, struct rhlist_head, rhead) : NULL; } +static __always_inline struct rhlist_head *rhltable_lookup_likely( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params, + RHT_LOOKUP_LIKELY); + + return likely(he) ? container_of(he, struct rhlist_head, rhead) : NULL; +} + /* Internal function, please use rhashtable_insert_fast() instead. This - * function returns the existing element already in hashes in there is a clash, + * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ -static inline void *__rhashtable_insert_fast( +static __always_inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -714,10 +756,11 @@ static inline void *__rhashtable_insert_fast( .ht = ht, .key = key, }; + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; - spinlock_t *lock; + unsigned long flags; unsigned int hash; int elasticity; void *data; @@ -726,23 +769,22 @@ static inline void *__rhashtable_insert_fast( tbl = rht_dereference_rcu(ht->tbl, ht); hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); + elasticity = RHT_ELASTICITY; + bkt = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!bkt) + goto out; + pprev = NULL; + flags = rht_lock(tbl, bkt); - if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { + if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: - spin_unlock_bh(lock); + rht_unlock(tbl, bkt, flags); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } - elasticity = RHT_ELASTICITY; - pprev = rht_bucket_insert(ht, tbl, hash); - data = ERR_PTR(-ENOMEM); - if (!pprev) - goto out; - - rht_for_each_continue(head, *pprev, tbl, hash) { + rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -750,13 +792,15 @@ slow_path: if (!key || (params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head)))) + rhashtable_compare(&arg, rht_obj(ht, head)))) { + pprev = &head->next; continue; + } data = rht_obj(ht, head); if (!rhlist) - goto out; + goto out_unlock; list = container_of(obj, struct rhlist_head, rhead); @@ -765,9 +809,13 @@ slow_path: RCU_INIT_POINTER(list->next, plist); head = rht_dereference_bucket(head->next, tbl, hash); RCU_INIT_POINTER(list->rhead.next, head); - rcu_assign_pointer(*pprev, obj); - - goto good; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt, flags); + } else + rht_assign_unlock(tbl, bkt, obj, flags); + data = NULL; + goto out; } if (elasticity <= 0) @@ -775,12 +823,13 @@ slow_path: data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) - goto out; + goto out_unlock; if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(*pprev, tbl, hash); + /* Inserting at head of list makes unlocking free. */ + head = rht_ptr(bkt, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -790,20 +839,21 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(*pprev, obj); - atomic_inc(&ht->nelems); + rht_assign_unlock(tbl, bkt, obj, flags); + if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); -good: data = NULL; - out: - spin_unlock_bh(lock); rcu_read_unlock(); return data; + +out_unlock: + rht_unlock(tbl, bkt, flags); + goto out; } /** @@ -812,17 +862,16 @@ out: * @obj: pointer to hash head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ -static inline int rhashtable_insert_fast( +static __always_inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -842,17 +891,16 @@ static inline int rhashtable_insert_fast( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ -static inline int rhltable_insert_key( +static __always_inline int rhltable_insert_key( struct rhltable *hlt, const void *key, struct rhlist_head *list, const struct rhashtable_params params) { @@ -866,17 +914,16 @@ static inline int rhltable_insert_key( * @list: pointer to hash list head inside object * @params: hash table parameters * - * Will take a per bucket spinlock to protect against mutual mutations + * Will take the per bucket bitlock to protect against mutual mutations * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. + * they map to the same bucket. * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ -static inline int rhltable_insert( +static __always_inline int rhltable_insert( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -893,22 +940,15 @@ static inline int rhltable_insert( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * This lookup function may only be used for fixed key hash table (key_len * parameter set). It will BUG() if used inappropriately. * * It is safe to call this function from atomic context. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. */ -static inline int rhashtable_lookup_insert_fast( +static __always_inline int rhashtable_lookup_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -935,7 +975,7 @@ static inline int rhashtable_lookup_insert_fast( * object if it exists, NULL if it did not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_fast( +static __always_inline void *rhashtable_lookup_get_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -955,21 +995,14 @@ static inline void *rhashtable_lookup_get_insert_fast( * @obj: pointer to hash head inside object * @params: hash table parameters * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * * Lookups may occur in parallel with hashtable mutations and resizing. * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). + * Will trigger an automatic deferred table resizing if residency in the + * table grows beyond 70%. * * Returns zero on success. */ -static inline int rhashtable_lookup_insert_key( +static __always_inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -987,15 +1020,15 @@ static inline int rhashtable_lookup_insert_key( /** * rhashtable_lookup_get_insert_key - lookup and insert object into hash table * @ht: hash table + * @key: key * @obj: pointer to hash head inside object * @params: hash table parameters - * @data: pointer to element data already in hashes * * Just like rhashtable_lookup_insert_key(), but this function returns the * object if it exists, NULL if it does not and the insertion was successful, * and an ERR_PTR otherwise. */ -static inline void *rhashtable_lookup_get_insert_key( +static __always_inline void *rhashtable_lookup_get_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -1005,24 +1038,26 @@ static inline void *rhashtable_lookup_get_insert_key( } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast_one( +static __always_inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t * lock; + unsigned long flags; unsigned int hash; int err = -ENOENT; hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - - spin_lock_bh(lock); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; + pprev = NULL; + flags = rht_lock(tbl, bkt); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1062,12 +1097,17 @@ static inline int __rhashtable_remove_fast_one( } } - rcu_assign_pointer(*pprev, obj); - break; + if (pprev) { + rcu_assign_pointer(*pprev, obj); + rht_unlock(tbl, bkt, flags); + } else { + rht_assign_unlock(tbl, bkt, obj, flags); + } + goto unlocked; } - spin_unlock_bh(lock); - + rht_unlock(tbl, bkt, flags); +unlocked: if (err > 0) { atomic_dec(&ht->nelems); if (unlikely(ht->p.automatic_shrinking && @@ -1080,7 +1120,7 @@ static inline int __rhashtable_remove_fast_one( } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static __always_inline int __rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params, bool rhlist) { @@ -1116,12 +1156,12 @@ static inline int __rhashtable_remove_fast( * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30%. * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhashtable_remove_fast( +static __always_inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { @@ -1136,14 +1176,14 @@ static inline int rhashtable_remove_fast( * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. + * considerably slower if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. + * Will automatically shrink the table if permitted when residency drops + * below 30% * * Returns zero on success, -ENOENT if the entry could not be found. */ -static inline int rhltable_remove( +static __always_inline int rhltable_remove( struct rhltable *hlt, struct rhlist_head *list, const struct rhashtable_params params) { @@ -1151,14 +1191,15 @@ static inline int rhltable_remove( } /* Internal function, please use rhashtable_replace_fast() instead */ -static inline int __rhashtable_replace_fast( +static __always_inline int __rhashtable_replace_fast( struct rhashtable *ht, struct bucket_table *tbl, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) { + struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; - spinlock_t *lock; + unsigned long flags; unsigned int hash; int err = -ENOENT; @@ -1169,25 +1210,33 @@ static inline int __rhashtable_replace_fast( if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) return -EINVAL; - lock = rht_bucket_lock(tbl, hash); + bkt = rht_bucket_var(tbl, hash); + if (!bkt) + return -ENOENT; - spin_lock_bh(lock); + pprev = NULL; + flags = rht_lock(tbl, bkt); - pprev = rht_bucket_var(tbl, hash); - rht_for_each_continue(he, *pprev, tbl, hash) { + rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; } rcu_assign_pointer(obj_new->next, obj_old->next); - rcu_assign_pointer(*pprev, obj_new); + if (pprev) { + rcu_assign_pointer(*pprev, obj_new); + rht_unlock(tbl, bkt, flags); + } else { + rht_assign_unlock(tbl, bkt, obj_new, flags); + } err = 0; - break; + goto unlocked; } - spin_unlock_bh(lock); + rht_unlock(tbl, bkt, flags); +unlocked: return err; } @@ -1205,7 +1254,7 @@ static inline int __rhashtable_replace_fast( * Returns zero on success, -ENOENT if the entry could not be found, * -EINVAL if hash is not the same for the old and new objects. */ -static inline int rhashtable_replace_fast( +static __always_inline int rhashtable_replace_fast( struct rhashtable *ht, struct rhash_head *obj_old, struct rhash_head *obj_new, const struct rhashtable_params params) @@ -1232,14 +1281,6 @@ static inline int rhashtable_replace_fast( return err; } -/* Obsolete function, do not use in new code. */ -static inline int rhashtable_walk_init(struct rhashtable *ht, - struct rhashtable_iter *iter, gfp_t gfp) -{ - rhashtable_walk_enter(ht, iter); - return 0; -} - /** * rhltable_walk_enter - Initialise an iterator * @hlt: Table to walk over @@ -1255,15 +1296,16 @@ static inline int rhashtable_walk_init(struct rhashtable *ht, * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { - return rhashtable_walk_enter(&hlt->ht, iter); + rhashtable_walk_enter(&hlt->ht, iter); } /** @@ -1279,12 +1321,12 @@ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void *arg), void *arg) { - return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); + rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { - return rhltable_free_and_destroy(hlt, NULL, NULL); + rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */ |
