diff options
Diffstat (limited to 'lib/sbitmap.c')
| -rw-r--r-- | lib/sbitmap.c | 374 |
1 files changed, 179 insertions, 195 deletions
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 6220fa67fb7e..4d188d05db15 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -21,7 +21,7 @@ static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) int i; for_each_possible_cpu(i) - *per_cpu_ptr(sb->alloc_hint, i) = prandom_u32() % depth; + *per_cpu_ptr(sb->alloc_hint, i) = get_random_u32_below(depth); } return 0; } @@ -33,7 +33,7 @@ static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, hint = this_cpu_read(*sb->alloc_hint); if (unlikely(hint >= depth)) { - hint = depth ? prandom_u32() % depth : 0; + hint = depth ? get_random_u32_below(depth) : 0; this_cpu_write(*sb->alloc_hint, hint); } @@ -60,12 +60,30 @@ static inline void update_alloc_hint_after_get(struct sbitmap *sb, /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map, + unsigned int depth, unsigned int alloc_hint, bool wrap) { - unsigned long mask; + unsigned long mask, word_mask; - if (!READ_ONCE(map->cleared)) - return false; + guard(raw_spinlock_irqsave)(&map->swap_lock); + + if (!map->cleared) { + if (depth == 0) + return false; + + word_mask = (~0UL) >> (BITS_PER_LONG - depth); + /* + * The current behavior is to always retry after moving + * ->cleared to word, and we change it to retry in case + * of any free bits. To avoid an infinite loop, we need + * to take wrap & alloc_hint into account, otherwise a + * soft lockup may occur. + */ + if (!wrap && alloc_hint) + word_mask &= ~((1UL << alloc_hint) - 1); + + return (READ_ONCE(map->word) & word_mask) != word_mask; + } /* * First get a stable cleared mask, setting the old mask to 0. @@ -85,7 +103,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, bool alloc_hint) { unsigned int bits_per_word; - unsigned int i; + int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); @@ -111,16 +129,15 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, sb->alloc_hint = NULL; } - sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); + sb->map = kvzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); if (!sb->map) { free_percpu(sb->alloc_hint); return -ENOMEM; } - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } + for (i = 0; i < sb->map_nr; i++) + raw_spin_lock_init(&sb->map[i].swap_lock); + return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); @@ -131,15 +148,10 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(&sb->map[i]); + sbitmap_deferred_clear(&sb->map[i], 0, 0, 0); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); - - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } } EXPORT_SYMBOL_GPL(sbitmap_resize); @@ -177,43 +189,61 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } -static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, - unsigned int alloc_hint) +static int sbitmap_find_bit_in_word(struct sbitmap_word *map, + unsigned int depth, + unsigned int alloc_hint, + bool wrap) { - struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, - !sb->round_robin); + nr = __sbitmap_get_word(&map->word, depth, + alloc_hint, wrap); if (nr != -1) break; - if (!sbitmap_deferred_clear(map)) + if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap)) break; } while (1); return nr; } -static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +static unsigned int __map_depth_with_shallow(const struct sbitmap *sb, + int index, + unsigned int shallow_depth) { - unsigned int i, index; - int nr = -1; + u64 shallow_word_depth; + unsigned int word_depth, reminder; - index = SB_NR_TO_INDEX(sb, alloc_hint); + word_depth = __map_depth(sb, index); + if (shallow_depth >= sb->depth) + return word_depth; - /* - * Unless we're doing round robin tag allocation, just use the - * alloc_hint to find the right word index. No point in looping - * twice in find_next_zero_bit() for that case. - */ - if (sb->round_robin) - alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - else - alloc_hint = 0; + shallow_word_depth = word_depth * shallow_depth; + reminder = do_div(shallow_word_depth, sb->depth); + + if (reminder >= (index + 1) * word_depth) + shallow_word_depth++; + + return (unsigned int)shallow_word_depth; +} + +static int sbitmap_find_bit(struct sbitmap *sb, + unsigned int shallow_depth, + unsigned int index, + unsigned int alloc_hint, + bool wrap) +{ + unsigned int i; + int nr = -1; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); + unsigned int depth = __map_depth_with_shallow(sb, index, + shallow_depth); + + if (depth) + nr = sbitmap_find_bit_in_word(&sb->map[index], depth, + alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; @@ -228,6 +258,26 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) return nr; } +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +{ + unsigned int index; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + /* + * Unless we're doing round robin tag allocation, just use the + * alloc_hint to find the right word index. No point in looping + * twice in find_next_zero_bit() for that case. + */ + if (sb->round_robin) + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); + else + alloc_hint = 0; + + return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, + !sb->round_robin); +} + int sbitmap_get(struct sbitmap *sb) { int nr; @@ -249,38 +299,30 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { - unsigned int i, index; - int nr = -1; + unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); - - for (i = 0; i < sb->map_nr; i++) { -again: - nr = __sbitmap_get_word(&sb->map[index].word, - min(sb->map[index].depth, shallow_depth), - SB_NR_TO_BIT(sb, alloc_hint), true); - if (nr != -1) { - nr += index << sb->shift; - break; - } - - if (sbitmap_deferred_clear(&sb->map[index])) - goto again; - - /* Jump to next index. */ - index++; - alloc_hint = index << sb->shift; - - if (index >= sb->map_nr) { - index = 0; - alloc_hint = 0; - } - } - - return nr; -} - -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); + + return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); +} + +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; @@ -295,7 +337,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) return nr; } -EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { @@ -315,11 +356,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; + unsigned int word_depth = __map_depth(sb, i); if (set) - weight += bitmap_weight(&word->word, word->depth); + weight += bitmap_weight(&word->word, word_depth); else - weight += bitmap_weight(&word->cleared, word->depth); + weight += bitmap_weight(&word->cleared, word_depth); } return weight; } @@ -367,7 +409,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); - unsigned int word_bits = READ_ONCE(sb->map[i].depth); + unsigned int word_bits = __map_depth(sb, i); word &= ~cleared; @@ -398,32 +440,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { - unsigned int wake_batch; - unsigned int shallow_depth; - - /* - * For each batch, we wake up one queue. We need to make sure that our - * batch size is small enough that the full depth of the bitmap, - * potentially limited by a shallow depth, is enough to wake up all of - * the queues. - * - * Each full word of the bitmap has bits_per_word bits, and there might - * be a partial word. There are depth / bits_per_word full words and - * depth % bits_per_word bits left over. In bitwise arithmetic: - * - * bits_per_word = 1 << shift - * depth / bits_per_word = depth >> shift - * depth % bits_per_word = depth & ((1 << shift) - 1) - * - * Each word can be limited to sbq->min_shallow_depth bits. - */ - shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); - depth = ((depth >> sbq->sb.shift) * shallow_depth + - min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); - wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, - SBQ_WAKE_BATCH); - - return wake_batch; + return clamp_t(unsigned int, + min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES, + 1, SBQ_WAKE_BATCH); } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, @@ -441,6 +460,8 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); atomic_set(&sbq->ws_active, 0); + atomic_set(&sbq->completion_cnt, 0); + atomic_set(&sbq->wakeup_cnt, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@ -448,50 +469,33 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, return -ENOMEM; } - for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + for (i = 0; i < SBQ_WAIT_QUEUES; i++) init_waitqueue_head(&sbq->ws[i].wait); - atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); - } return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); -static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, - unsigned int wake_batch) -{ - int i; - - if (sbq->wake_batch != wake_batch) { - WRITE_ONCE(sbq->wake_batch, wake_batch); - /* - * Pairs with the memory barrier in sbitmap_queue_wake_up() - * to ensure that the batch size is updated before the wait - * counts. - */ - smp_mb(); - for (i = 0; i < SBQ_WAIT_QUEUES; i++) - atomic_set(&sbq->ws[i].wait_cnt, 1); - } -} - static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { unsigned int wake_batch; wake_batch = sbq_calc_wake_batch(sbq, depth); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + if (sbq->wake_batch != wake_batch) + WRITE_ONCE(sbq->wake_batch, wake_batch); } void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; + unsigned int depth = (sbq->sb.depth + users - 1) / users; - wake_batch = clamp_val((sbq->sb.depth + users - 1) / - users, 4, SBQ_WAKE_BATCH); - __sbitmap_queue_update_wake_batch(sbq, wake_batch); + wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, + 1, SBQ_WAKE_BATCH); + + WRITE_ONCE(sbq->wake_batch, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); @@ -527,30 +531,31 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, for (i = 0; i < sb->map_nr; i++) { struct sbitmap_word *map = &sb->map[index]; unsigned long get_mask; + unsigned int map_depth = __map_depth(sb, index); + unsigned long val; - sbitmap_deferred_clear(map); - if (map->word == (1UL << (map->depth - 1)) - 1) - continue; + sbitmap_deferred_clear(map, 0, 0, 0); + val = READ_ONCE(map->word); + if (val == (1UL << (map_depth - 1)) - 1) + goto next; - nr = find_first_zero_bit(&map->word, map->depth); - if (nr + nr_tags <= map->depth) { + nr = find_first_zero_bit(&val, map_depth); + if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; - int map_tags = min_t(int, nr_tags, map->depth); - unsigned long val, ret; - - get_mask = ((1UL << map_tags) - 1) << nr; - do { - val = READ_ONCE(map->word); - ret = atomic_long_cmpxchg(ptr, val, get_mask | val); - } while (ret != val); - get_mask = (get_mask & ~ret) >> nr; + + get_mask = ((1UL << nr_tags) - 1) << nr; + while (!atomic_long_try_cmpxchg(ptr, &val, + get_mask | val)) + ; + get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); update_alloc_hint_after_get(sb, depth, hint, - *offset + map_tags - 1); + *offset + nr_tags - 1); return get_mask; } } +next: /* Jump to next index. */ if (++index >= sb->map_nr) index = 0; @@ -559,14 +564,14 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, return 0; } -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth) +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth) { WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); return sbitmap_get_shallow(&sbq->sb, shallow_depth); } -EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) @@ -576,74 +581,55 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, } EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); -static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) +static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - int i, wake_index; + int i, wake_index, woken; if (!atomic_read(&sbq->ws_active)) - return NULL; + return; wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; + /* + * Advance the index before checking the current queue. + * It improves fairness, by ensuring the queue doesn't + * need to be fully emptied before trying to wake up + * from the next one. + */ + wake_index = sbq_index_inc(wake_index); + if (waitqueue_active(&ws->wait)) { - if (wake_index != atomic_read(&sbq->wake_index)) - atomic_set(&sbq->wake_index, wake_index); - return ws; + woken = wake_up_nr(&ws->wait, nr); + if (woken == nr) + break; + nr -= woken; } - - wake_index = sbq_index_inc(wake_index); } - return NULL; + if (wake_index != atomic_read(&sbq->wake_index)) + atomic_set(&sbq->wake_index, wake_index); } -static bool __sbq_wake_up(struct sbitmap_queue *sbq) +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) { - struct sbq_wait_state *ws; - unsigned int wake_batch; - int wait_cnt; - - ws = sbq_wake_ptr(sbq); - if (!ws) - return false; - - wait_cnt = atomic_dec_return(&ws->wait_cnt); - if (wait_cnt <= 0) { - int ret; - - wake_batch = READ_ONCE(sbq->wake_batch); + unsigned int wake_batch = READ_ONCE(sbq->wake_batch); + unsigned int wakeups; - /* - * Pairs with the memory barrier in sbitmap_queue_resize() to - * ensure that we see the batch size update before the wait - * count is reset. - */ - smp_mb__before_atomic(); - - /* - * For concurrent callers of this, the one that failed the - * atomic_cmpxhcg() race should call this function again - * to wakeup a new batch on a different 'ws'. - */ - ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); - if (ret == wait_cnt) { - sbq_index_atomic_inc(&sbq->wake_index); - wake_up_nr(&ws->wait, wake_batch); - return false; - } + if (!atomic_read(&sbq->ws_active)) + return; - return true; - } + atomic_add(nr, &sbq->completion_cnt); + wakeups = atomic_read(&sbq->wakeup_cnt); - return false; -} + do { + if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) + return; + } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, + &wakeups, wakeups + wake_batch)); -void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) -{ - while (__sbq_wake_up(sbq)) - ; + __sbitmap_queue_wake_up(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); @@ -682,7 +668,7 @@ void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset, atomic_long_andnot(mask, (atomic_long_t *) addr); smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, nr_tags); sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(), tags[nr_tags - 1] - offset); } @@ -710,7 +696,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, * waiter. See the comment on waitqueue_active(). */ smp_mb__after_atomic(); - sbitmap_queue_wake_up(sbq); + sbitmap_queue_wake_up(sbq, 1); sbitmap_update_cpu_hint(&sbq->sb, cpu, nr); } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); @@ -760,9 +746,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_puts(m, "ws={\n"); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[i]; - - seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", - atomic_read(&ws->wait_cnt), + seq_printf(m, "\t{.wait=%s},\n", waitqueue_active(&ws->wait) ? "active" : "inactive"); } seq_puts(m, "}\n"); |
