summaryrefslogtreecommitdiff
path: root/drivers/md/dm-bufio.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-bufio.c')
-rw-r--r--drivers/md/dm-bufio.c451
1 files changed, 216 insertions, 235 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index bc309e41d074..e6d28be11c5c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -41,16 +41,6 @@
#define DM_BUFIO_LOW_WATERMARK_RATIO 16
/*
- * Check buffer ages in this interval (seconds)
- */
-#define DM_BUFIO_WORK_TIMER_SECS 30
-
-/*
- * Free buffers when they are older than this (seconds)
- */
-#define DM_BUFIO_DEFAULT_AGE_SECS 300
-
-/*
* The nr of bytes of cached data to keep around.
*/
#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
@@ -68,6 +58,8 @@
#define LIST_DIRTY 1
#define LIST_SIZE 2
+#define SCAN_RESCHED_CYCLE 16
+
/*--------------------------------------------------------------*/
/*
@@ -254,7 +246,7 @@ enum evict_result {
typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
unsigned long tested = 0;
struct list_head *h = lru->cursor;
@@ -295,7 +287,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
h = h->next;
- cond_resched();
+ if (!no_sleep)
+ cond_resched();
}
return NULL;
@@ -317,9 +310,10 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
*/
enum data_mode {
DATA_MODE_SLAB = 0,
- DATA_MODE_GET_FREE_PAGES = 1,
- DATA_MODE_VMALLOC = 2,
- DATA_MODE_LIMIT = 3
+ DATA_MODE_KMALLOC = 1,
+ DATA_MODE_GET_FREE_PAGES = 2,
+ DATA_MODE_VMALLOC = 3,
+ DATA_MODE_LIMIT = 4
};
struct dm_buffer {
@@ -382,7 +376,10 @@ struct dm_buffer {
*/
struct buffer_tree {
- struct rw_semaphore lock;
+ union {
+ struct rw_semaphore lock;
+ rwlock_t spinlock;
+ } u;
struct rb_root root;
} ____cacheline_aligned_in_smp;
@@ -393,9 +390,12 @@ struct dm_buffer_cache {
* on the locks.
*/
unsigned int num_locks;
+ bool no_sleep;
struct buffer_tree trees[];
};
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
return dm_hash_locks_index(block, num_locks);
@@ -403,22 +403,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
/*
@@ -442,18 +454,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
static void __lh_lock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- down_write(&lh->cache->trees[index].lock);
- else
- down_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_read(&lh->cache->trees[index].u.lock);
+ }
}
static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- up_write(&lh->cache->trees[index].lock);
- else
- up_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_read(&lh->cache->trees[index].u.lock);
+ }
}
/*
@@ -496,20 +522,21 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
{
struct lru_entry *le = list_entry(l, struct lru_entry, list);
- if (!le)
- return NULL;
-
return le_to_buffer(le);
}
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
unsigned int i;
bc->num_locks = num_locks;
+ bc->no_sleep = no_sleep;
for (i = 0; i < bc->num_locks; i++) {
- init_rwsem(&bc->trees[i].lock);
+ if (no_sleep)
+ rwlock_init(&bc->trees[i].u.spinlock);
+ else
+ init_rwsem(&bc->trees[i].u.lock);
bc->trees[i].root = RB_ROOT;
}
@@ -648,7 +675,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
struct lru_entry *le;
struct dm_buffer *b;
- le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
return NULL;
@@ -702,7 +729,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
while (true) {
- le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
break;
@@ -915,10 +942,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
{
unsigned int i;
+ BUG_ON(bc->no_sleep);
for (i = 0; i < bc->num_locks; i++) {
- down_write(&bc->trees[i].lock);
+ down_write(&bc->trees[i].u.lock);
__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
- up_write(&bc->trees[i].lock);
+ up_write(&bc->trees[i].u.lock);
}
}
@@ -963,7 +991,7 @@ struct dm_bufio_client {
sector_t start;
- struct shrinker shrinker;
+ struct shrinker *shrinker;
struct work_struct shrink_work;
atomic_long_t need_shrink;
@@ -979,8 +1007,6 @@ struct dm_bufio_client {
struct dm_buffer_cache cache; /* must be last member */
};
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
@@ -1021,14 +1047,13 @@ static unsigned long dm_bufio_cache_size_latch;
static DEFINE_SPINLOCK(global_spinlock);
-/*
- * Buffers are freed after this timeout
- */
-static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+static unsigned int dm_bufio_max_age; /* No longer does anything */
+
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
+static unsigned long dm_bufio_allocated_kmalloc;
static unsigned long dm_bufio_allocated_get_free_pages;
static unsigned long dm_bufio_allocated_vmalloc;
static unsigned long dm_bufio_current_allocated;
@@ -1051,7 +1076,6 @@ static LIST_HEAD(dm_bufio_all_clients);
static DEFINE_MUTEX(dm_bufio_clients_lock);
static struct workqueue_struct *dm_bufio_wq;
-static struct delayed_work dm_bufio_cleanup_old_work;
static struct work_struct dm_bufio_replacement_work;
@@ -1071,6 +1095,7 @@ static void adjust_total_allocated(struct dm_buffer *b, bool unlink)
static unsigned long * const class_ptr[DATA_MODE_LIMIT] = {
&dm_bufio_allocated_kmem_cache,
+ &dm_bufio_allocated_kmalloc,
&dm_bufio_allocated_get_free_pages,
&dm_bufio_allocated_vmalloc,
};
@@ -1134,7 +1159,7 @@ static void __cache_size_refresh(void)
* If the allocation may fail we use __get_free_pages. Memory fragmentation
* won't have a fatal effect here, but it just causes flushes of some other
* buffers and more I/O will be performed. Don't use __get_free_pages if it
- * always fails (i.e. order > MAX_ORDER).
+ * always fails (i.e. order > MAX_PAGE_ORDER).
*
* If the allocation shouldn't fail we use __vmalloc. This is only for the
* initial reserve allocation, so there's no risk of wasting all vmalloc
@@ -1148,6 +1173,11 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
return kmem_cache_alloc(c->slab_cache, gfp_mask);
}
+ if (unlikely(c->block_size < PAGE_SIZE)) {
+ *data_mode = DATA_MODE_KMALLOC;
+ return kmalloc(c->block_size, gfp_mask | __GFP_RECLAIMABLE);
+ }
+
if (c->block_size <= KMALLOC_MAX_SIZE &&
gfp_mask & __GFP_NORETRY) {
*data_mode = DATA_MODE_GET_FREE_PAGES;
@@ -1171,6 +1201,10 @@ static void free_buffer_data(struct dm_bufio_client *c,
kmem_cache_free(c->slab_cache, data);
break;
+ case DATA_MODE_KMALLOC:
+ kfree(data);
+ break;
+
case DATA_MODE_GET_FREE_PAGES:
free_pages((unsigned long)data,
c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
@@ -1256,7 +1290,8 @@ static void dmio_complete(unsigned long error, void *context)
}
static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
- unsigned int n_sectors, unsigned int offset)
+ unsigned int n_sectors, unsigned int offset,
+ unsigned short ioprio)
{
int r;
struct dm_io_request io_req = {
@@ -1279,7 +1314,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
io_req.mem.ptr.vma = (char *)b->data + offset;
}
- r = dm_io(&io_req, 1, &region, NULL);
+ r = dm_io(&io_req, 1, &region, NULL, ioprio);
if (unlikely(r))
b->end_io(b, errno_to_blk_status(r));
}
@@ -1295,26 +1330,28 @@ static void bio_complete(struct bio *bio)
}
static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
- unsigned int n_sectors, unsigned int offset)
+ unsigned int n_sectors, unsigned int offset,
+ unsigned short ioprio)
{
struct bio *bio;
char *ptr;
unsigned int len;
- bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
+ bio = bio_kmalloc(1, GFP_NOWAIT);
if (!bio) {
- use_dmio(b, op, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset, ioprio);
return;
}
- bio_init(bio, b->c->bdev, bio->bi_inline_vecs, 1, op);
+ bio_init_inline(bio, b->c->bdev, 1, op);
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = bio_complete;
bio->bi_private = b;
+ bio->bi_ioprio = ioprio;
ptr = (char *)b->data + offset;
len = n_sectors << SECTOR_SHIFT;
- __bio_add_page(bio, virt_to_page(ptr), len, offset_in_page(ptr));
+ bio_add_virt_nofail(bio, ptr, len);
submit_bio(bio);
}
@@ -1332,7 +1369,7 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block
return sector;
}
-static void submit_io(struct dm_buffer *b, enum req_op op,
+static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio,
void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned int n_sectors;
@@ -1362,9 +1399,9 @@ static void submit_io(struct dm_buffer *b, enum req_op op,
}
if (b->data_mode != DATA_MODE_VMALLOC)
- use_bio(b, op, sector, n_sectors, offset);
+ use_bio(b, op, sector, n_sectors, offset, ioprio);
else
- use_dmio(b, op, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset, ioprio);
}
/*
@@ -1420,7 +1457,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
b->write_end = b->dirty_end;
if (!write_list)
- submit_io(b, REQ_OP_WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio);
else
list_add_tail(&b->write_list, write_list);
}
@@ -1434,7 +1471,7 @@ static void __flush_write_list(struct list_head *write_list)
struct dm_buffer *b =
list_entry(write_list->next, struct dm_buffer, write_list);
list_del(&b->write_list);
- submit_io(b, REQ_OP_WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio);
cond_resched();
}
blk_finish_plug(&plug);
@@ -1564,18 +1601,18 @@ static struct dm_buffer *__alloc_buffer_wait_no_callback(struct dm_bufio_client
* dm-bufio is resistant to allocation failures (it just keeps
* one buffer reserved in cases all the allocations fail).
* So set flags to not try too hard:
- * GFP_NOWAIT: don't wait; if we need to sleep we'll release our
- * mutex and wait ourselves.
+ * GFP_NOWAIT: don't wait and don't print a warning in case of
+ * failure; if we need to sleep we'll release our mutex
+ * and wait ourselves.
* __GFP_NORETRY: don't retry and rather return failure
* __GFP_NOMEMALLOC: don't use emergency reserves
- * __GFP_NOWARN: don't print a warning in case of failure
*
* For debugging, if we set the cache size to 1, no new buffers will
* be allocated.
*/
while (1) {
if (dm_bufio_cache_size_latch != 1) {
- b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+ b = alloc_buffer(c, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOMEMALLOC);
if (b)
return b;
}
@@ -1816,7 +1853,8 @@ static void read_endio(struct dm_buffer *b, blk_status_t status)
* and uses dm_bufio_mark_buffer_dirty to write new data back).
*/
static void *new_read(struct dm_bufio_client *c, sector_t block,
- enum new_flag nf, struct dm_buffer **bp)
+ enum new_flag nf, struct dm_buffer **bp,
+ unsigned short ioprio)
{
int need_submit = 0;
struct dm_buffer *b;
@@ -1869,9 +1907,10 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
return NULL;
if (need_submit)
- submit_io(b, REQ_OP_READ, read_endio);
+ submit_io(b, REQ_OP_READ, ioprio, read_endio);
- wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+ if (nf != NF_GET) /* we already tested this condition above */
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
int error = blk_status_to_errno(b->read_error);
@@ -1889,32 +1928,46 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp)
{
- return new_read(c, block, NF_GET, bp);
+ return new_read(c, block, NF_GET, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_get);
-void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
- struct dm_buffer **bp)
+static void *__dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio)
{
if (WARN_ON_ONCE(dm_bufio_in_request()))
return ERR_PTR(-EINVAL);
- return new_read(c, block, NF_READ, bp);
+ return new_read(c, block, NF_READ, bp, ioprio);
+}
+
+void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp)
+{
+ return __dm_bufio_read(c, block, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_read);
+void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ struct dm_buffer **bp, unsigned short ioprio)
+{
+ return __dm_bufio_read(c, block, bp, ioprio);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_read_with_ioprio);
+
void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
struct dm_buffer **bp)
{
if (WARN_ON_ONCE(dm_bufio_in_request()))
return ERR_PTR(-EINVAL);
- return new_read(c, block, NF_FRESH, bp);
+ return new_read(c, block, NF_FRESH, bp, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_new);
-void dm_bufio_prefetch(struct dm_bufio_client *c,
- sector_t block, unsigned int n_blocks)
+static void __dm_bufio_prefetch(struct dm_bufio_client *c,
+ sector_t block, unsigned int n_blocks,
+ unsigned short ioprio)
{
struct blk_plug plug;
@@ -1950,7 +2003,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
dm_bufio_unlock(c);
if (need_submit)
- submit_io(b, REQ_OP_READ, read_endio);
+ submit_io(b, REQ_OP_READ, ioprio, read_endio);
dm_bufio_release(b);
cond_resched();
@@ -1965,8 +2018,20 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
flush_plug:
blk_finish_plug(&plug);
}
+
+void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks)
+{
+ return __dm_bufio_prefetch(c, block, n_blocks, IOPRIO_DEFAULT);
+}
EXPORT_SYMBOL_GPL(dm_bufio_prefetch);
+void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, sector_t block,
+ unsigned int n_blocks, unsigned short ioprio)
+{
+ return __dm_bufio_prefetch(c, block, n_blocks, ioprio);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_prefetch_with_ioprio);
+
void dm_bufio_release(struct dm_buffer *b)
{
struct dm_bufio_client *c = b->c;
@@ -2130,7 +2195,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
if (WARN_ON_ONCE(dm_bufio_in_request()))
return -EINVAL;
- return dm_io(&io_req, 1, &io_reg, NULL);
+ return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
@@ -2154,11 +2219,11 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c
if (WARN_ON_ONCE(dm_bufio_in_request()))
return -EINVAL; /* discards are optional */
- return dm_io(&io_req, 1, &io_reg, NULL);
+ return dm_io(&io_req, 1, &io_reg, NULL, IOPRIO_DEFAULT);
}
EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
-static bool forget_buffer(struct dm_bufio_client *c, sector_t block)
+static void forget_buffer(struct dm_bufio_client *c, sector_t block)
{
struct dm_buffer *b;
@@ -2173,8 +2238,6 @@ static bool forget_buffer(struct dm_bufio_client *c, sector_t block)
cache_put_and_wake(c, b);
}
}
-
- return b ? true : false;
}
/*
@@ -2350,7 +2413,12 @@ static void __scan(struct dm_bufio_client *c)
atomic_long_dec(&c->need_shrink);
freed++;
- cond_resched();
+
+ if (unlikely(freed % SCAN_RESCHED_CYCLE == 0)) {
+ dm_bufio_unlock(c);
+ cond_resched();
+ dm_bufio_lock(c);
+ }
}
}
}
@@ -2368,7 +2436,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink
{
struct dm_bufio_client *c;
- c = container_of(shrink, struct dm_bufio_client, shrinker);
+ c = shrink->private_data;
atomic_long_add(sc->nr_to_scan, &c->need_shrink);
queue_work(dm_bufio_wq, &c->shrink_work);
@@ -2377,7 +2445,7 @@ static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink
static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
- struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
+ struct dm_bufio_client *c = shrink->private_data;
unsigned long count = cache_total(&c->cache);
unsigned long retain_target = get_retain_buffers(c);
unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
@@ -2407,7 +2475,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
int r;
unsigned int num_locks;
struct dm_bufio_client *c;
- char slab_name[27];
+ char slab_name[64];
+ static atomic_t seqno = ATOMIC_INIT(0);
if (!block_size || block_size & ((1 << SECTOR_SHIFT) - 1)) {
DMERR("%s: block size not specified or is not multiple of 512b", __func__);
@@ -2421,7 +2490,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
r = -ENOMEM;
goto bad_client;
}
- cache_init(&c->cache, num_locks);
+ cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
c->bdev = bdev;
c->block_size = block_size;
@@ -2454,11 +2523,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
goto bad_dm_io;
}
- if (block_size <= KMALLOC_MAX_SIZE &&
- (block_size < PAGE_SIZE || !is_power_of_2(block_size))) {
+ if (block_size <= KMALLOC_MAX_SIZE && !is_power_of_2(block_size)) {
unsigned int align = min(1U << __ffs(block_size), (unsigned int)PAGE_SIZE);
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u", block_size);
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_cache-%u-%u",
+ block_size, atomic_inc_return(&seqno));
c->slab_cache = kmem_cache_create(slab_name, block_size, align,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_cache) {
@@ -2467,9 +2536,11 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
}
}
if (aux_size)
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u", aux_size);
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u-%u",
+ aux_size, atomic_inc_return(&seqno));
else
- snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer");
+ snprintf(slab_name, sizeof(slab_name), "dm_bufio_buffer-%u",
+ atomic_inc_return(&seqno));
c->slab_buffer = kmem_cache_create(slab_name, sizeof(struct dm_buffer) + aux_size,
0, SLAB_RECLAIM_ACCOUNT, NULL);
if (!c->slab_buffer) {
@@ -2490,14 +2561,20 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
INIT_WORK(&c->shrink_work, shrink_work);
atomic_long_set(&c->need_shrink, 0);
- c->shrinker.count_objects = dm_bufio_shrink_count;
- c->shrinker.scan_objects = dm_bufio_shrink_scan;
- c->shrinker.seeks = 1;
- c->shrinker.batch = 0;
- r = register_shrinker(&c->shrinker, "dm-bufio:(%u:%u)",
- MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
- if (r)
+ c->shrinker = shrinker_alloc(0, "dm-bufio:(%u:%u)",
+ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+ if (!c->shrinker) {
+ r = -ENOMEM;
goto bad;
+ }
+
+ c->shrinker->count_objects = dm_bufio_shrink_count;
+ c->shrinker->scan_objects = dm_bufio_shrink_scan;
+ c->shrinker->seeks = 1;
+ c->shrinker->batch = 0;
+ c->shrinker->private_data = c;
+
+ shrinker_register(c->shrinker);
mutex_lock(&dm_bufio_clients_lock);
dm_bufio_client_count++;
@@ -2537,7 +2614,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
drop_buffers(c);
- unregister_shrinker(&c->shrinker);
+ shrinker_free(c->shrinker);
flush_work(&c->shrink_work);
mutex_lock(&dm_bufio_clients_lock);
@@ -2590,130 +2667,6 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
/*--------------------------------------------------------------*/
-static unsigned int get_max_age_hz(void)
-{
- unsigned int max_age = READ_ONCE(dm_bufio_max_age);
-
- if (max_age > UINT_MAX / HZ)
- max_age = UINT_MAX / HZ;
-
- return max_age * HZ;
-}
-
-static bool older_than(struct dm_buffer *b, unsigned long age_hz)
-{
- return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz);
-}
-
-struct evict_params {
- gfp_t gfp;
- unsigned long age_hz;
-
- /*
- * This gets updated with the largest last_accessed (ie. most
- * recently used) of the evicted buffers. It will not be reinitialised
- * by __evict_many(), so you can use it across multiple invocations.
- */
- unsigned long last_accessed;
-};
-
-/*
- * We may not be able to evict this buffer if IO pending or the client
- * is still using it.
- *
- * And if GFP_NOFS is used, we must not do any I/O because we hold
- * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
- * rerouted to different bufio client.
- */
-static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
-{
- struct evict_params *params = context;
-
- if (!(params->gfp & __GFP_FS) ||
- (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
- if (test_bit_acquire(B_READING, &b->state) ||
- test_bit(B_WRITING, &b->state) ||
- test_bit(B_DIRTY, &b->state))
- return ER_DONT_EVICT;
- }
-
- return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP;
-}
-
-static unsigned long __evict_many(struct dm_bufio_client *c,
- struct evict_params *params,
- int list_mode, unsigned long max_count)
-{
- unsigned long count;
- unsigned long last_accessed;
- struct dm_buffer *b;
-
- for (count = 0; count < max_count; count++) {
- b = cache_evict(&c->cache, list_mode, select_for_evict, params);
- if (!b)
- break;
-
- last_accessed = READ_ONCE(b->last_accessed);
- if (time_after_eq(params->last_accessed, last_accessed))
- params->last_accessed = last_accessed;
-
- __make_buffer_clean(b);
- __free_buffer_wake(b);
-
- cond_resched();
- }
-
- return count;
-}
-
-static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
-{
- struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0};
- unsigned long retain = get_retain_buffers(c);
- unsigned long count;
- LIST_HEAD(write_list);
-
- dm_bufio_lock(c);
-
- __check_watermark(c, &write_list);
- if (unlikely(!list_empty(&write_list))) {
- dm_bufio_unlock(c);
- __flush_write_list(&write_list);
- dm_bufio_lock(c);
- }
-
- count = cache_total(&c->cache);
- if (count > retain)
- __evict_many(c, &params, LIST_CLEAN, count - retain);
-
- dm_bufio_unlock(c);
-}
-
-static void cleanup_old_buffers(void)
-{
- unsigned long max_age_hz = get_max_age_hz();
- struct dm_bufio_client *c;
-
- mutex_lock(&dm_bufio_clients_lock);
-
- __cache_size_refresh();
-
- list_for_each_entry(c, &dm_bufio_all_clients, client_list)
- evict_old_buffers(c, max_age_hz);
-
- mutex_unlock(&dm_bufio_clients_lock);
-}
-
-static void work_fn(struct work_struct *w)
-{
- cleanup_old_buffers();
-
- queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
- DM_BUFIO_WORK_TIMER_SECS * HZ);
-}
-
-/*--------------------------------------------------------------*/
-
/*
* Global cleanup tries to evict the oldest buffers from across _all_
* the clients. It does this by repeatedly evicting a few buffers from
@@ -2751,27 +2704,55 @@ static void __insert_client(struct dm_bufio_client *new_client)
list_add_tail(&new_client->client_list, h);
}
+static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
+{
+ /* In no-sleep mode, we cannot wait on IO. */
+ if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) {
+ if (test_bit_acquire(B_READING, &b->state) ||
+ test_bit(B_WRITING, &b->state) ||
+ test_bit(B_DIRTY, &b->state))
+ return ER_DONT_EVICT;
+ }
+ return ER_EVICT;
+}
+
static unsigned long __evict_a_few(unsigned long nr_buffers)
{
- unsigned long count;
struct dm_bufio_client *c;
- struct evict_params params = {
- .gfp = GFP_KERNEL,
- .age_hz = 0,
- /* set to jiffies in case there are no buffers in this client */
- .last_accessed = jiffies
- };
+ unsigned long oldest_buffer = jiffies;
+ unsigned long last_accessed;
+ unsigned long count;
+ struct dm_buffer *b;
c = __pop_client();
if (!c)
return 0;
dm_bufio_lock(c);
- count = __evict_many(c, &params, LIST_CLEAN, nr_buffers);
+
+ for (count = 0; count < nr_buffers; count++) {
+ b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL);
+ if (!b)
+ break;
+
+ last_accessed = READ_ONCE(b->last_accessed);
+ if (time_after_eq(oldest_buffer, last_accessed))
+ oldest_buffer = last_accessed;
+
+ __make_buffer_clean(b);
+ __free_buffer_wake(b);
+
+ if (need_resched()) {
+ dm_bufio_unlock(c);
+ cond_resched();
+ dm_bufio_lock(c);
+ }
+ }
+
dm_bufio_unlock(c);
if (count)
- c->oldest_buffer = params.last_accessed;
+ c->oldest_buffer = oldest_buffer;
__insert_client(c);
return count;
@@ -2828,6 +2809,7 @@ static int __init dm_bufio_init(void)
__u64 mem;
dm_bufio_allocated_kmem_cache = 0;
+ dm_bufio_allocated_kmalloc = 0;
dm_bufio_allocated_get_free_pages = 0;
dm_bufio_allocated_vmalloc = 0;
dm_bufio_current_allocated = 0;
@@ -2853,10 +2835,7 @@ static int __init dm_bufio_init(void)
if (!dm_bufio_wq)
return -ENOMEM;
- INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
- queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
- DM_BUFIO_WORK_TIMER_SECS * HZ);
return 0;
}
@@ -2868,7 +2847,6 @@ static void __exit dm_bufio_exit(void)
{
int bug = 0;
- cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
destroy_workqueue(dm_bufio_wq);
if (dm_bufio_client_count) {
@@ -2905,7 +2883,7 @@ module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644);
MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644);
-MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+MODULE_PARM_DESC(max_age_seconds, "No longer does anything");
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644);
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
@@ -2916,6 +2894,9 @@ MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");
module_param_named(allocated_kmem_cache_bytes, dm_bufio_allocated_kmem_cache, ulong, 0444);
MODULE_PARM_DESC(allocated_kmem_cache_bytes, "Memory allocated with kmem_cache_alloc");
+module_param_named(allocated_kmalloc_bytes, dm_bufio_allocated_kmalloc, ulong, 0444);
+MODULE_PARM_DESC(allocated_kmalloc_bytes, "Memory allocated with kmalloc_alloc");
+
module_param_named(allocated_get_free_pages_bytes, dm_bufio_allocated_get_free_pages, ulong, 0444);
MODULE_PARM_DESC(allocated_get_free_pages_bytes, "Memory allocated with get_free_pages");
@@ -2925,6 +2906,6 @@ MODULE_PARM_DESC(allocated_vmalloc_bytes, "Memory allocated with vmalloc");
module_param_named(current_allocated_bytes, dm_bufio_current_allocated, ulong, 0444);
MODULE_PARM_DESC(current_allocated_bytes, "Memory currently used by the cache");
-MODULE_AUTHOR("Mikulas Patocka <dm-devel@redhat.com>");
+MODULE_AUTHOR("Mikulas Patocka <dm-devel@lists.linux.dev>");
MODULE_DESCRIPTION(DM_NAME " buffered I/O library");
MODULE_LICENSE("GPL");