From 3301bc53358a0eb0a0db65fd7a513cd4cb50c83a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Jan 2022 15:29:45 +0800 Subject: lib/sbitmap: kill 'depth' from sbitmap_word Only the last sbitmap_word can have different depth, and all the others must have same depth of 1U << sb->shift, so not necessary to store it in sbitmap_word, and it can be retrieved easily and efficiently by adding one internal helper of __map_depth(sb, index). Remove 'depth' field from sbitmap_word, then the annotation of ____cacheline_aligned_in_smp for 'word' isn't needed any more. Not see performance effect when running high parallel IOPS test on null_blk. This way saves us one cacheline(usually 64 words) per each sbitmap_word. Cc: Martin Wilck Signed-off-by: Ming Lei Reviewed-by: Martin Wilck Reviewed-by: John Garry Link: https://lore.kernel.org/r/20220110072945.347535-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'lib/sbitmap.c') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 09d293c30fd2..b7cb96ae4701 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -85,7 +85,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, bool alloc_hint) { unsigned int bits_per_word; - unsigned int i; if (shift < 0) shift = sbitmap_calculate_shift(depth); @@ -117,10 +116,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, return -ENOMEM; } - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } return 0; } EXPORT_SYMBOL_GPL(sbitmap_init_node); @@ -135,11 +130,6 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); - - for (i = 0; i < sb->map_nr; i++) { - sb->map[i].depth = min(depth, bits_per_word); - depth -= sb->map[i].depth; - } } EXPORT_SYMBOL_GPL(sbitmap_resize); @@ -184,8 +174,8 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, int nr; do { - nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, - !sb->round_robin); + nr = __sbitmap_get_word(&map->word, __map_depth(sb, index), + alloc_hint, !sb->round_robin); if (nr != -1) break; if (!sbitmap_deferred_clear(map)) @@ -257,7 +247,9 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, for (i = 0; i < sb->map_nr; i++) { again: nr = __sbitmap_get_word(&sb->map[index].word, - min(sb->map[index].depth, shallow_depth), + min_t(unsigned int, + __map_depth(sb, index), + shallow_depth), SB_NR_TO_BIT(sb, alloc_hint), true); if (nr != -1) { nr += index << sb->shift; @@ -315,11 +307,12 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) for (i = 0; i < sb->map_nr; i++) { const struct sbitmap_word *word = &sb->map[i]; + unsigned int word_depth = __map_depth(sb, i); if (set) - weight += bitmap_weight(&word->word, word->depth); + weight += bitmap_weight(&word->word, word_depth); else - weight += bitmap_weight(&word->cleared, word->depth); + weight += bitmap_weight(&word->cleared, word_depth); } return weight; } @@ -367,7 +360,7 @@ void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) for (i = 0; i < sb->map_nr; i++) { unsigned long word = READ_ONCE(sb->map[i].word); unsigned long cleared = READ_ONCE(sb->map[i].cleared); - unsigned int word_bits = READ_ONCE(sb->map[i].depth); + unsigned int word_bits = __map_depth(sb, i); word &= ~cleared; @@ -531,15 +524,16 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, for (i = 0; i < sb->map_nr; i++) { struct sbitmap_word *map = &sb->map[index]; unsigned long get_mask; + unsigned int map_depth = __map_depth(sb, index); sbitmap_deferred_clear(map); - if (map->word == (1UL << (map->depth - 1)) - 1) + if (map->word == (1UL << (map_depth - 1)) - 1) continue; - nr = find_first_zero_bit(&map->word, map->depth); - if (nr + nr_tags <= map->depth) { + nr = find_first_zero_bit(&map->word, map_depth); + if (nr + nr_tags <= map_depth) { atomic_long_t *ptr = (atomic_long_t *) &map->word; - int map_tags = min_t(int, nr_tags, map->depth); + int map_tags = min_t(int, nr_tags, map_depth); unsigned long val, ret; get_mask = ((1UL << map_tags) - 1) << nr; -- cgit From 3f607293b74d6acb06571a774a500143c1f0ed2c Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 8 Feb 2022 20:07:04 +0800 Subject: sbitmap: Delete old sbitmap_queue_get_shallow() Since __sbitmap_queue_get_shallow() was introduced in commit c05e66733788 ("sbitmap: add sbitmap_get_shallow() operation"), it has not been used. Delete __sbitmap_queue_get_shallow() and rename public __sbitmap_queue_get_shallow() -> sbitmap_queue_get_shallow() as it is odd to have public __foo but no foo at all. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1644322024-105340-1-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/sbitmap.c') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index b7cb96ae4701..2eb3de18ded3 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -557,14 +557,14 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, return 0; } -int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, - unsigned int shallow_depth) +int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth) { WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); return sbitmap_get_shallow(&sbq->sb, shallow_depth); } -EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); +EXPORT_SYMBOL_GPL(sbitmap_queue_get_shallow); void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, unsigned int min_shallow_depth) -- cgit From 863a66cdb4df25fd146d9851c3289072298566d5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 16 Mar 2022 09:27:08 +0800 Subject: lib/sbitmap: allocate sb->map via kvzalloc_node sbitmap has been used in scsi for replacing atomic operations on sdev->device_busy, so IOPS on some fast scsi storage can be improved. However, sdev->device_busy can be changed in fast path, so we have to allocate the sb->map statically. sdev->device_busy has been capped to 1024, but some drivers may configure the default depth as < 8, then cause each sbitmap word to hold only one bit. Finally 1024 * 128( sizeof(sbitmap_word)) bytes is needed for sb->map, given it is order 5 allocation, sometimes it may fail. Avoid the issue by using kvzalloc_node() for allocating sb->map. Cc: Ewan D. Milne Signed-off-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220316012708.354668-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/sbitmap.c') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2eb3de18ded3..ae4fd4de9ebe 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -110,7 +110,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, sb->alloc_hint = NULL; } - sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); + sb->map = kvzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); if (!sb->map) { free_percpu(sb->alloc_hint); return -ENOMEM; -- cgit From fbb564a557809466c171b95f8d593a0972450ff2 Mon Sep 17 00:00:00 2001 From: wuchi Date: Sun, 5 Jun 2022 22:58:35 +0800 Subject: lib/sbitmap: Fix invalid loop in __sbitmap_queue_get_batch() 1. Getting next index before continue branch. 2. Checking free bits when setting the target bits. Otherwise, it may reuse the busying bits. Signed-off-by: wuchi Reviewed-by: Martin Wilck Link: https://lore.kernel.org/r/20220605145835.26916-1-wuchi.zero@gmail.com Fixes: 9672b0d43782 ("sbitmap: add __sbitmap_queue_get_batch()") Signed-off-by: Jens Axboe --- lib/sbitmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib/sbitmap.c') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index ae4fd4de9ebe..29eb0484215a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -528,7 +528,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, sbitmap_deferred_clear(map); if (map->word == (1UL << (map_depth - 1)) - 1) - continue; + goto next; nr = find_first_zero_bit(&map->word, map_depth); if (nr + nr_tags <= map_depth) { @@ -539,6 +539,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, get_mask = ((1UL << map_tags) - 1) << nr; do { val = READ_ONCE(map->word); + if ((val & ~get_mask) != val) + goto next; ret = atomic_long_cmpxchg(ptr, val, get_mask | val); } while (ret != val); get_mask = (get_mask & ~ret) >> nr; @@ -549,6 +551,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, return get_mask; } } +next: /* Jump to next index. */ if (++index >= sb->map_nr) index = 0; -- cgit