diff options
Diffstat (limited to 'drivers/md/dm-bufio.c')
| -rw-r--r-- | drivers/md/dm-bufio.c | 451 |
1 files changed, 216 insertions, 235 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index bc309e41d074..e6d28be11c5c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -41,16 +41,6 @@ #define DM_BUFIO_LOW_WATERMARK_RATIO 16 /* - * Check buffer ages in this interval (seconds) - */ -#define DM_BUFIO_WORK_TIMER_SECS 30 - -/* - * Free buffers when they are older than this (seconds) - */ -#define DM_BUFIO_DEFAULT_AGE_SECS 300 - -/* * The nr of bytes of cached data to keep around. */ #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) @@ -68,6 +58,8 @@ #define LIST_DIRTY 1 #define LIST_SIZE 2 +#define SCAN_RESCHED_CYCLE 16 + /*--------------------------------------------------------------*/ /* @@ -254,7 +246,7 @@ enum evict_result { typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -295,7 +287,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con h = h->next; - cond_resched(); + if (!no_sleep) + cond_resched(); } return NULL; @@ -317,9 +310,10 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con */ enum data_mode { DATA_MODE_SLAB = 0, - DATA_MODE_GET_FREE_PAGES = 1, - DATA_MODE_VMALLOC = 2, - DATA_MODE_LIMIT = 3 + DATA_MODE_KMALLOC = 1, + DATA_MODE_GET_FREE_PAGES = 2, + DATA_MODE_VMALLOC = 3, + DATA_MODE_LIMIT = 4 }; struct dm_buffer { @@ -382,7 +376,10 @@ struct dm_buffer { */ struct buffer_tree { - struct rw_semaphore lock; + union { + struct rw_semaphore lock; + rwlock_t spinlock; + } u; struct rb_root root; } ____cacheline_aligned_in_smp; @@ -393,9 +390,12 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + bool no_sleep; struct buffer_tree trees[]; }; +static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -403,22 +403,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { - down_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_read(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { - down_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { - up_write(&bc->trees[cache_index(block, bc->num_locks)].lock); + if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) + write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + else + up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); } /* @@ -442,18 +454,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool static void __lh_lock(struct lock_history *lh, unsigned int index) { - if (lh->write) - down_write(&lh->cache->trees[index].lock); - else - down_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_lock_bh(&lh->cache->trees[index].u.spinlock); + else + down_read(&lh->cache->trees[index].u.lock); + } } static void __lh_unlock(struct lock_history *lh, unsigned int index) { - if (lh->write) - up_write(&lh->cache->trees[index].lock); - else - up_read(&lh->cache->trees[index].lock); + if (lh->write) { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + write_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_write(&lh->cache->trees[index].u.lock); + } else { + if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep) + read_unlock_bh(&lh->cache->trees[index].u.spinlock); + else + up_read(&lh->cache->trees[index].u.lock); + } } /* @@ -496,20 +522,21 @@ static struct dm_buffer *list_to_buffer(struct list_head *l) { struct lru_entry *le = list_entry(l, struct lru_entry, list); - if (!le) - return NULL; - return le_to_buffer(le); } -static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks) +static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep) { unsigned int i; bc->num_locks = num_locks; + bc->no_sleep = no_sleep; for (i = 0; i < bc->num_locks; i++) { - init_rwsem(&bc->trees[i].lock); + if (no_sleep) + rwlock_init(&bc->trees[i].u.spinlock); + else + init_rwsem(&bc->trees[i].u.lock); bc->trees[i].root = RB_ROOT; } @@ -648,7 +675,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); if (!le) return NULL; @@ -702,7 +729,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); if (!le) break; @@ -915,10 +942,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc, { unsigned int i; + BUG_ON(bc->no_sleep); for (i = 0; i < bc->num_locks; i++) { - down_write(&bc->trees[i].lock); + down_write(&bc->trees[i].u.lock); __remove_range(bc, &bc->trees[i].root, begin, end, pred, release); - up_write(&bc->trees[i].lock); + up_write(&bc->trees[i].u.lock); } } @@ -963,7 +991,7 @@ struct dm_bufio_client { sector_t start; - struct shrinker shrinker; + struct shrinker *shrinker; struct work_struct shrink_work; atomic_long_t need_shrink; @@ -979,8 +1007,6 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; -static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); - /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1021,14 +1047,13 @@ static unsigned long dm_bufio_cache_size_latch; static DEFINE_SPINLOCK(global_spinlock); -/* - * Buffers are freed after this timeout - */ -static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; +static unsigned int dm_bufio_max_age; /* No longer does anything */ + static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; +static unsigned long dm_bufio_allocated_kmalloc; static unsigned long dm_bufio_allocated_get_free_pages; static unsigned long dm_bufio_allocated_vmalloc; static unsigned long dm_bufio_current_allocated; @@ -1051,7 +1076,6 @@ static LIST_HEAD(dm_bufio_all_clients); static DEFINE_MUTEX(dm_bufio_clients_lock); static struct workqueue_struct *dm_bufio_wq; -static struct delayed_work dm_bufio_cleanup_old_work; static struct work_struct dm_bufio_replacement_work; @@ -1071,6 +1095,7 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink) static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, + &dm_bufio_allocated_kmalloc, &dm_bufio_allocated_get_free_pages, &dm_bufio_allocated_vmalloc, }; @@ -1134,7 +1159,7 @@ static void __cache_size_refresh(void) * If the allocation may fail we use __get_free_pages. Memory fragmentation * won't have a fatal effect here, but it just causes flushes of some other * buffers and more I/O will be performed. Don't use __get_free_pages if it - * always fails (i.e. order > MAX_ORDER). + * always fails (i.e. order > MAX_PAGE_ORDER). * * If the allocation shouldn't fail we use __vmalloc. This is only for the * initial reserve allocation, so there's no risk of wasting all vmalloc @@ -1148,6 +1173,11 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, return kmem_cache_alloc(c->slab_cache, gfp_mask); } + if (unlikely(c->block_size < PAGE_SIZE)) { + *data_mode = DATA_MODE_KMALLOC; + return kmalloc(c->block_size, gfp_mask | __GFP_RECLAIMABLE); + } + if (c->block_size <= KMALLOC_MAX_SIZE && gfp_mask & __GFP_NORETRY) { *data_mode = DATA_MODE_GET_FREE_PAGES; @@ -1171,6 +1201,10 @@ static void free_buffer_data(struct dm_bufio_client *c, kmem_cache_free(c->slab_cache, data); break; + case DATA_MODE_KMALLOC: + kfree(data); + break; + case DATA_MODE_GET_FREE_PAGES: free_pages((unsigned long)data, c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT)); @@ -1256,7 +1290,8 @@ static void dmio_complete(unsigned long error, void *context) } static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, - unsigned int n_sectors, unsigned int offset) + unsigned int n_sectors, unsigned int offset, + unsigned short ioprio) { int r; struct dm_io_request io_req = { @@ -1279,7 +1314,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, io_req.mem.ptr.vma = (char *)b->data + offset; } - r = dm_io(&io_req, 1, ®ion, NULL); + r = dm_io(&io_req, 1, ®ion, NULL, ioprio); if (unlikely(r)) b->end_io(b, errno_to_blk_status(r)); } @@ -1295,26 +1330,28 @@ static void bio_complete(struct bio *bio) } static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, - unsigned int n_sectors, unsigned int offset) + unsigned int n_sectors, unsigned int offset, + unsigned short ioprio) { struct bio *bio; char *ptr; unsigned int len; - bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); + bio = bio_kmalloc(1, GFP_NOWAIT); if (!bio) { - use_dmio(b, op, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset, ioprio); return; } - bio_init(bio, b->c->bdev, bio->bi_inline_vecs, 1, op); + bio_init_inline(bio, b->c->bdev, 1, op); bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_complete; bio->bi_private = b; + bio->bi_ioprio = ioprio; ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; - __bio_add_page(bio, virt_to_page(ptr), len, offset_in_page(ptr)); + bio_add_virt_nofail(bio, ptr, len); submit_bio(bio); } @@ -1332,7 +1369,7 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block return sector; } -static void submit_io(struct dm_buffer *b, enum req_op op, +static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio, void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned int n_sectors; @@ -1362,9 +1399,9 @@ static void submit_io(struct dm_buffer *b, enum req_op op, } if (b->data_mode != DATA_MODE_VMALLOC) - use_bio(b, op, sector, n_sectors, offset); + use_bio(b, op, sector, n_sectors, offset, ioprio); else - use_dmio(b, op, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset, ioprio); } /* @@ -1420,7 +1457,7 @@ static void __write_dirty_buffer(struct dm_buffer *b, b->write_end = b->dirty_end; if (!write_list) - submit_io(b, REQ_OP_WRITE, write_endio); + submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio); else list_add_tail(&b->write_list, write_list); } @@ -1434,7 +1471,7 @@ static void __flush_write_list(struct list_head *write_list) struct dm_buffer *b = list_entry(write_list->next, struct dm_buffer, write_list); list_del(&b->write_list); - submit_io(b, REQ_OP_WRITE, write_endio); + submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio); cond_resched(); } blk_finish_plug(&plug); @@ -1564,18 +1601,18 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client * dm-bufio is resistant to allocation failures (it just keeps * one buffer reserved in cases all the allocations fail). * So set flags to not try too hard: - * GFP_NOWAIT: don't wait; if we need to sleep we'll release our - * mutex and wait ourselves. + * GFP_NOWAIT: don't wait and don't print a warning in case of + * failure; if we need to sleep we'll release our mutex + * and wait ourselves. * __GFP_NORETRY: don't retry and rather return failure * __GFP_NOMEMALLOC: don't use emergency reserves - * __GFP_NOWARN: don't print a warning in case of failure * * For debugging, if we set the cache size to 1, no new buffers will * be allocated. */ while (1) { if (dm_bufio_cache_size_latch != 1) { - b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC); if (b) return b; } @@ -1816,7 +1853,8 @@ static void read_endio(struct dm_buffer *b, blk_status_t status) * and uses dm_bufio_mark_buffer_dirty to write new data back). */ static void *new_read(struct dm_bufio_client *c, sector_t block, - enum new_flag nf, struct dm_buffer **bp) + enum new_flag nf, struct dm_buffer **bp, + unsigned short ioprio) { int need_submit = 0; struct dm_buffer *b; @@ -1869,9 +1907,10 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, return NULL; if (need_submit) - submit_io(b, REQ_OP_READ, read_endio); + submit_io(b, REQ_OP_READ, ioprio, read_endio); - wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); + if (nf != NF_GET) /* we already tested this condition above */ + wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); if (b->read_error) { int error = blk_status_to_errno(b->read_error); @@ -1889,32 +1928,46 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) { - return new_read(c, block, NF_GET, bp); + return new_read(c, block, NF_GET, bp, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_get); -void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, - struct dm_buffer **bp) +static void *__dm_bufio_read(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio) { if (WARN_ON_ONCE(dm_bufio_in_request())) return ERR_PTR(-EINVAL); - return new_read(c, block, NF_READ, bp); + return new_read(c, block, NF_READ, bp, ioprio); +} + +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp) +{ + return __dm_bufio_read(c, block, bp, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_read); +void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio) +{ + return __dm_bufio_read(c, block, bp, ioprio); +} +EXPORT_SYMBOL_GPL(dm_bufio_read_with_ioprio); + void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) { if (WARN_ON_ONCE(dm_bufio_in_request())) return ERR_PTR(-EINVAL); - return new_read(c, block, NF_FRESH, bp); + return new_read(c, block, NF_FRESH, bp, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_new); -void dm_bufio_prefetch(struct dm_bufio_client *c, - sector_t block, unsigned int n_blocks) +static void __dm_bufio_prefetch(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, + unsigned short ioprio) { struct blk_plug plug; @@ -1950,7 +2003,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, dm_bufio_unlock(c); if (need_submit) - submit_io(b, REQ_OP_READ, read_endio); + submit_io(b, REQ_OP_READ, ioprio, read_endio); dm_bufio_release(b); cond_resched(); @@ -1965,8 +2018,20 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, flush_plug: blk_finish_plug(&plug); } + +void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks) +{ + return __dm_bufio_prefetch(c, block, n_blocks, IOPRIO_DEFAULT); +} EXPORT_SYMBOL_GPL(dm_bufio_prefetch); +void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, sector_t block, + unsigned int n_blocks, unsigned short ioprio) +{ + return __dm_bufio_prefetch(c, block, n_blocks, ioprio); +} +EXPORT_SYMBOL_GPL(dm_bufio_prefetch_with_ioprio); + void dm_bufio_release(struct dm_buffer *b) { struct dm_bufio_client *c = b->c; @@ -2130,7 +2195,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) if (WARN_ON_ONCE(dm_bufio_in_request())) return -EINVAL; - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); @@ -2154,11 +2219,11 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c if (WARN_ON_ONCE(dm_bufio_in_request())) return -EINVAL; /* discards are optional */ - return dm_io(&io_req, 1, &io_reg, NULL); + return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_issue_discard); -static bool forget_buffer(struct dm_bufio_client *c, sector_t block) +static void forget_buffer(struct dm_bufio_client *c, sector_t block) { struct dm_buffer *b; @@ -2173,8 +2238,6 @@ static bool forget_buffer(struct dm_bufio_client *c, sector_t block) cache_put_and_wake(c, b); } } - - return b ? true : false; } /* @@ -2350,7 +2413,12 @@ static void __scan(struct dm_bufio_client *c) atomic_long_dec(&c->need_shrink); freed++; - cond_resched(); + + if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } } } } @@ -2368,7 +2436,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink { struct dm_bufio_client *c; - c = container_of(shrink, struct dm_bufio_client, shrinker); + c = shrink->private_data; atomic_long_add(sc->nr_to_scan, &c->need_shrink); queue_work(dm_bufio_wq, &c->shrink_work); @@ -2377,7 +2445,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker); + struct dm_bufio_client *c = shrink->private_data; unsigned long count = cache_total(&c->cache); unsigned long retain_target = get_retain_buffers(c); unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink); @@ -2407,7 +2475,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign int r; unsigned int num_locks; struct dm_bufio_client *c; - char slab_name[27]; + char slab_name[64]; + static atomic_t seqno = ATOMIC_INIT(0); if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) { DMERR("%s: block size not specified or is not multiple of 512b", __func__); @@ -2421,7 +2490,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign r = -ENOMEM; goto bad_client; } - cache_init(&c->cache, num_locks); + cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0); c->bdev = bdev; c->block_size = block_size; @@ -2454,11 +2523,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign goto bad_dm_io; } - if (block_size <= KMALLOC_MAX_SIZE && - (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { + if (block_size <= KMALLOC_MAX_SIZE && !is_power_of_2(block_size)) { unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE); - snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u-%u", + block_size, atomic_inc_return(&seqno)); c->slab_cache = kmem_cache_create(slab_name, block_size, align, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_cache) { @@ -2467,9 +2536,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign } } if (aux_size) - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u-%u", + aux_size, atomic_inc_return(&seqno)); else - snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer"); + snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", + atomic_inc_return(&seqno)); c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size, 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_buffer) { @@ -2490,14 +2561,20 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign INIT_WORK(&c->shrink_work, shrink_work); atomic_long_set(&c->need_shrink, 0); - c->shrinker.count_objects = dm_bufio_shrink_count; - c->shrinker.scan_objects = dm_bufio_shrink_scan; - c->shrinker.seeks = 1; - c->shrinker.batch = 0; - r = register_shrinker(&c->shrinker, "dm-bufio:(%u:%u)", - MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); - if (r) + c->shrinker = shrinker_alloc(0, "dm-bufio:(%u:%u)", + MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + if (!c->shrinker) { + r = -ENOMEM; goto bad; + } + + c->shrinker->count_objects = dm_bufio_shrink_count; + c->shrinker->scan_objects = dm_bufio_shrink_scan; + c->shrinker->seeks = 1; + c->shrinker->batch = 0; + c->shrinker->private_data = c; + + shrinker_register(c->shrinker); mutex_lock(&dm_bufio_clients_lock); dm_bufio_client_count++; @@ -2537,7 +2614,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) drop_buffers(c); - unregister_shrinker(&c->shrinker); + shrinker_free(c->shrinker); flush_work(&c->shrink_work); mutex_lock(&dm_bufio_clients_lock); @@ -2590,130 +2667,6 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); /*--------------------------------------------------------------*/ -static unsigned int get_max_age_hz(void) -{ - unsigned int max_age = READ_ONCE(dm_bufio_max_age); - - if (max_age > UINT_MAX / HZ) - max_age = UINT_MAX / HZ; - - return max_age * HZ; -} - -static bool older_than(struct dm_buffer *b, unsigned long age_hz) -{ - return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz); -} - -struct evict_params { - gfp_t gfp; - unsigned long age_hz; - - /* - * This gets updated with the largest last_accessed (ie. most - * recently used) of the evicted buffers. It will not be reinitialised - * by __evict_many(), so you can use it across multiple invocations. - */ - unsigned long last_accessed; -}; - -/* - * We may not be able to evict this buffer if IO pending or the client - * is still using it. - * - * And if GFP_NOFS is used, we must not do any I/O because we hold - * dm_bufio_clients_lock and we would risk deadlock if the I/O gets - * rerouted to different bufio client. - */ -static enum evict_result select_for_evict(struct dm_buffer *b, void *context) -{ - struct evict_params *params = context; - - if (!(params->gfp & __GFP_FS) || - (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) { - if (test_bit_acquire(B_READING, &b->state) || - test_bit(B_WRITING, &b->state) || - test_bit(B_DIRTY, &b->state)) - return ER_DONT_EVICT; - } - - return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP; -} - -static unsigned long __evict_many(struct dm_bufio_client *c, - struct evict_params *params, - int list_mode, unsigned long max_count) -{ - unsigned long count; - unsigned long last_accessed; - struct dm_buffer *b; - - for (count = 0; count < max_count; count++) { - b = cache_evict(&c->cache, list_mode, select_for_evict, params); - if (!b) - break; - - last_accessed = READ_ONCE(b->last_accessed); - if (time_after_eq(params->last_accessed, last_accessed)) - params->last_accessed = last_accessed; - - __make_buffer_clean(b); - __free_buffer_wake(b); - - cond_resched(); - } - - return count; -} - -static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) -{ - struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0}; - unsigned long retain = get_retain_buffers(c); - unsigned long count; - LIST_HEAD(write_list); - - dm_bufio_lock(c); - - __check_watermark(c, &write_list); - if (unlikely(!list_empty(&write_list))) { - dm_bufio_unlock(c); - __flush_write_list(&write_list); - dm_bufio_lock(c); - } - - count = cache_total(&c->cache); - if (count > retain) - __evict_many(c, ¶ms, LIST_CLEAN, count - retain); - - dm_bufio_unlock(c); -} - -static void cleanup_old_buffers(void) -{ - unsigned long max_age_hz = get_max_age_hz(); - struct dm_bufio_client *c; - - mutex_lock(&dm_bufio_clients_lock); - - __cache_size_refresh(); - - list_for_each_entry(c, &dm_bufio_all_clients, client_list) - evict_old_buffers(c, max_age_hz); - - mutex_unlock(&dm_bufio_clients_lock); -} - -static void work_fn(struct work_struct *w) -{ - cleanup_old_buffers(); - - queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, - DM_BUFIO_WORK_TIMER_SECS * HZ); -} - -/*--------------------------------------------------------------*/ - /* * Global cleanup tries to evict the oldest buffers from across _all_ * the clients. It does this by repeatedly evicting a few buffers from @@ -2751,27 +2704,55 @@ static void __insert_client(struct dm_bufio_client *new_client) list_add_tail(&new_client->client_list, h); } +static enum evict_result select_for_evict(struct dm_buffer *b, void *context) +{ + /* In no-sleep mode, we cannot wait on IO. */ + if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) { + if (test_bit_acquire(B_READING, &b->state) || + test_bit(B_WRITING, &b->state) || + test_bit(B_DIRTY, &b->state)) + return ER_DONT_EVICT; + } + return ER_EVICT; +} + static unsigned long __evict_a_few(unsigned long nr_buffers) { - unsigned long count; struct dm_bufio_client *c; - struct evict_params params = { - .gfp = GFP_KERNEL, - .age_hz = 0, - /* set to jiffies in case there are no buffers in this client */ - .last_accessed = jiffies - }; + unsigned long oldest_buffer = jiffies; + unsigned long last_accessed; + unsigned long count; + struct dm_buffer *b; c = __pop_client(); if (!c) return 0; dm_bufio_lock(c); - count = __evict_many(c, ¶ms, LIST_CLEAN, nr_buffers); + + for (count = 0; count < nr_buffers; count++) { + b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL); + if (!b) + break; + + last_accessed = READ_ONCE(b->last_accessed); + if (time_after_eq(oldest_buffer, last_accessed)) + oldest_buffer = last_accessed; + + __make_buffer_clean(b); + __free_buffer_wake(b); + + if (need_resched()) { + dm_bufio_unlock(c); + cond_resched(); + dm_bufio_lock(c); + } + } + dm_bufio_unlock(c); if (count) - c->oldest_buffer = params.last_accessed; + c->oldest_buffer = oldest_buffer; __insert_client(c); return count; @@ -2828,6 +2809,7 @@ static int __init dm_bufio_init(void) __u64 mem; dm_bufio_allocated_kmem_cache = 0; + dm_bufio_allocated_kmalloc = 0; dm_bufio_allocated_get_free_pages = 0; dm_bufio_allocated_vmalloc = 0; dm_bufio_current_allocated = 0; @@ -2853,10 +2835,7 @@ static int __init dm_bufio_init(void) if (!dm_bufio_wq) return -ENOMEM; - INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); - queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, - DM_BUFIO_WORK_TIMER_SECS * HZ); return 0; } @@ -2868,7 +2847,6 @@ static void __exit dm_bufio_exit(void) { int bug = 0; - cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); destroy_workqueue(dm_bufio_wq); if (dm_bufio_client_count) { @@ -2905,7 +2883,7 @@ module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644); MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644); -MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); +MODULE_PARM_DESC(max_age_seconds, "No longer does anything"); module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644); MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); @@ -2916,6 +2894,9 @@ MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory"); module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, 0444); MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc"); +module_param_named(allocated_kmalloc_bytes, dm_bufio_allocated_kmalloc, ulong, 0444); +MODULE_PARM_DESC(allocated_kmalloc_bytes, "Memory allocated with kmalloc_alloc"); + module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, 0444); MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages"); @@ -2925,6 +2906,6 @@ MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc"); module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, 0444); MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache"); -MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>"); +MODULE_AUTHOR("Mikulas Patocka <dm-devel@lists.linux.dev>"); MODULE_DESCRIPTION(DM_NAME " buffered I/O library"); MODULE_LICENSE("GPL"); |
