summaryrefslogtreecommitdiff
path: root/drivers/md/bcache/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/super.c')
-rw-r--r--drivers/md/bcache/super.c346
1 files changed, 187 insertions, 159 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 0ae2b3676293..c17d4517af22 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -168,14 +168,14 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
{
const char *err;
struct cache_sb_disk *s;
- struct page *page;
+ struct folio *folio;
unsigned int i;
- page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
- SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
- if (IS_ERR(page))
+ folio = mapping_read_folio_gfp(bdev->bd_mapping,
+ SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
+ if (IS_ERR(folio))
return "IO error";
- s = page_address(page) + offset_in_page(SB_OFFSET);
+ s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET);
sb->offset = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -272,7 +272,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
*res = s;
return NULL;
err:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -293,8 +293,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR;
- __bio_add_page(bio, virt_to_page(out), SB_SIZE,
- offset_in_page(out));
+ bio_add_virt_nofail(bio, out, SB_SIZE);
out->offset = cpu_to_le64(sb->offset);
@@ -327,9 +326,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
submit_bio(bio);
}
-static void bch_write_bdev_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bch_write_bdev_super_unlock)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
+ closure_type(dc, struct cached_dev, sb_write);
up(&dc->sb_write_mutex);
}
@@ -363,9 +362,9 @@ static void write_super_endio(struct bio *bio)
closure_put(&ca->set->sb_write);
}
-static void bcache_write_super_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(bcache_write_super_unlock)
{
- struct cache_set *c = container_of(cl, struct cache_set, sb_write);
+ closure_type(c, struct cache_set, sb_write);
up(&c->sb_write_mutex);
}
@@ -407,9 +406,9 @@ static void uuid_endio(struct bio *bio)
closure_put(cl);
}
-static void uuid_io_unlock(struct closure *cl)
+static CLOSURE_CALLBACK(uuid_io_unlock)
{
- struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
+ closure_type(c, struct cache_set, uuid_write);
up(&c->uuid_write_mutex);
}
@@ -546,7 +545,8 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
- static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+ static const char zero_uuid[16] __nonstring =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
return uuid_find(c, zero_uuid);
}
@@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d)
bcache_device_detach(d);
if (disk) {
- ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(disk->first_minor));
+ ida_free(&bcache_device_idx,
+ first_minor_to_idx(disk->first_minor));
put_disk(disk);
}
@@ -897,14 +897,30 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors, struct block_device *cached_bdev,
const struct block_device_operations *ops)
{
- struct request_queue *q;
const size_t max_stripes = min_t(size_t, INT_MAX,
SIZE_MAX / sizeof(atomic_t));
+ struct queue_limits lim = {
+ .max_hw_sectors = UINT_MAX,
+ .max_sectors = UINT_MAX,
+ .max_segment_size = UINT_MAX,
+ .max_segments = BIO_MAX_VECS,
+ .max_hw_discard_sectors = UINT_MAX,
+ .io_min = block_size,
+ .logical_block_size = block_size,
+ .physical_block_size = block_size,
+ .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA,
+ };
uint64_t n;
int idx;
+ if (cached_bdev) {
+ d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT;
+ lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev));
+ }
if (!d->stripe_size)
d->stripe_size = 1 << 31;
+ else if (d->stripe_size < BCH_MIN_STRIPE_SZ)
+ d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size);
n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!n || n > max_stripes) {
@@ -924,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
if (!d->full_dirty_stripes)
goto out_free_stripe_sectors_dirty;
- idx = ida_simple_get(&bcache_device_idx, 0,
- BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
+ idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1,
+ GFP_KERNEL);
if (idx < 0)
goto out_free_full_dirty_stripes;
@@ -933,53 +949,37 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
goto out_ida_remove;
- d->disk = blk_alloc_disk(NUMA_NO_NODE);
- if (!d->disk)
- goto out_bioset_exit;
-
- set_capacity(d->disk, sectors);
- snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
-
- d->disk->major = bcache_major;
- d->disk->first_minor = idx_to_first_minor(idx);
- d->disk->minors = BCACHE_MINORS;
- d->disk->fops = ops;
- d->disk->private_data = d;
-
- q = d->disk->queue;
- q->limits.max_hw_sectors = UINT_MAX;
- q->limits.max_sectors = UINT_MAX;
- q->limits.max_segment_size = UINT_MAX;
- q->limits.max_segments = BIO_MAX_VECS;
- blk_queue_max_discard_sectors(q, UINT_MAX);
- q->limits.discard_granularity = 512;
- q->limits.io_min = block_size;
- q->limits.logical_block_size = block_size;
- q->limits.physical_block_size = block_size;
-
- if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
+ if (lim.logical_block_size > PAGE_SIZE && cached_bdev) {
/*
* This should only happen with BCACHE_SB_VERSION_BDEV.
* Block/page size is checked for BCACHE_SB_VERSION_CDEV.
*/
- pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
- d->disk->disk_name, q->limits.logical_block_size,
+ pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
+ idx, lim.logical_block_size,
PAGE_SIZE, bdev_logical_block_size(cached_bdev));
/* This also adjusts physical block size/min io size if needed */
- blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
+ lim.logical_block_size = bdev_logical_block_size(cached_bdev);
}
- blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
+ d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ if (IS_ERR(d->disk))
+ goto out_bioset_exit;
- blk_queue_write_cache(q, true, true);
+ set_capacity(d->disk, sectors);
+ snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
+ d->disk->major = bcache_major;
+ d->disk->first_minor = idx_to_first_minor(idx);
+ d->disk->minors = BCACHE_MINORS;
+ d->disk->fops = ops;
+ d->disk->private_data = d;
return 0;
out_bioset_exit:
bioset_exit(&d->bio_split);
out_ida_remove:
- ida_simple_remove(&bcache_device_idx, idx);
+ ida_free(&bcache_device_idx, idx);
out_free_full_dirty_stripes:
kvfree(d->full_dirty_stripes);
out_free_stripe_sectors_dirty:
@@ -1342,9 +1342,9 @@ void bch_cached_dev_release(struct kobject *kobj)
module_put(THIS_MODULE);
}
-static void cached_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_free)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+ closure_type(dc, struct cached_dev, disk.cl);
if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
cancel_writeback_rate_update_dwork(dc);
@@ -1366,19 +1366,19 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
if (dc->sb_disk)
- put_page(virt_to_page(dc->sb_disk));
+ folio_put(virt_to_folio(dc->sb_disk));
- if (!IS_ERR_OR_NULL(dc->bdev))
- blkdev_put(dc->bdev, dc);
+ if (dc->bdev_file)
+ fput(dc->bdev_file);
wake_up(&unregister_wait);
kobject_put(&dc->disk.kobj);
}
-static void cached_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cached_dev_flush)
{
- struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
+ closure_type(dc, struct cached_dev, disk.cl);
struct bcache_device *d = &dc->disk;
mutex_lock(&bch_register_lock);
@@ -1388,7 +1388,7 @@ static void cached_dev_flush(struct closure *cl)
bch_cache_accounting_destroy(&dc->accounting);
kobject_del(&d->kobj);
- continue_at(cl, cached_dev_free, system_wq);
+ continue_at(cl, cached_dev_free, system_percpu_wq);
}
static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
@@ -1400,7 +1400,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
__module_get(THIS_MODULE);
INIT_LIST_HEAD(&dc->list);
closure_init(&dc->disk.cl, NULL);
- set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
+ set_closure_fn(&dc->disk.cl, cached_dev_flush, system_percpu_wq);
kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
INIT_WORK(&dc->detach, cached_dev_detach_finish);
sema_init(&dc->sb_write_mutex, 1);
@@ -1415,11 +1415,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
- dc->disk.stripe_size = q->limits.io_opt >> 9;
-
- if (dc->disk.stripe_size)
- dc->partial_stripes_expensive =
- q->limits.raid_partial_stripes_expensive;
+ if (bdev_io_opt(dc->bdev))
+ dc->partial_stripes_expensive = !!(q->limits.features &
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
ret = bcache_device_init(&dc->disk, block_size,
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
@@ -1427,9 +1425,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
if (ret)
return ret;
- blk_queue_io_opt(dc->disk.disk->queue,
- max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
-
atomic_set(&dc->io_errors, 0);
dc->io_disable = false;
dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
@@ -1444,7 +1439,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
/* Cached device - bcache superblock */
static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
- struct block_device *bdev,
+ struct file *bdev_file,
struct cached_dev *dc)
{
const char *err = "cannot allocate memory";
@@ -1452,14 +1447,15 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
int ret = -ENOMEM;
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
- dc->bdev = bdev;
+ dc->bdev_file = bdev_file;
+ dc->bdev = file_bdev(bdev_file);
dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9))
goto err;
err = "error creating kobject";
- if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache"))
+ if (kobject_add(&dc->disk.kobj, bdev_kobj(dc->bdev), "bcache"))
goto err;
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
@@ -1496,9 +1492,9 @@ void bch_flash_dev_release(struct kobject *kobj)
kfree(d);
}
-static void flash_dev_free(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_free)
{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ closure_type(d, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
@@ -1509,15 +1505,15 @@ static void flash_dev_free(struct closure *cl)
kobject_put(&d->kobj);
}
-static void flash_dev_flush(struct closure *cl)
+static CLOSURE_CALLBACK(flash_dev_flush)
{
- struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+ closure_type(d, struct bcache_device, cl);
mutex_lock(&bch_register_lock);
bcache_device_unlink(d);
mutex_unlock(&bch_register_lock);
kobject_del(&d->kobj);
- continue_at(cl, flash_dev_free, system_wq);
+ continue_at(cl, flash_dev_free, system_percpu_wq);
}
static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
@@ -1529,7 +1525,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
goto err_ret;
closure_init(&d->cl, NULL);
- set_closure_fn(&d->cl, flash_dev_flush, system_wq);
+ set_closure_fn(&d->cl, flash_dev_flush, system_percpu_wq);
kobject_init(&d->kobj, &bch_flash_dev_ktype);
@@ -1667,9 +1663,9 @@ void bch_cache_set_release(struct kobject *kobj)
module_put(THIS_MODULE);
}
-static void cache_set_free(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_free)
{
- struct cache_set *c = container_of(cl, struct cache_set, cl);
+ closure_type(c, struct cache_set, cl);
struct cache *ca;
debugfs_remove(c->debug);
@@ -1708,9 +1704,9 @@ static void cache_set_free(struct closure *cl)
kobject_put(&c->kobj);
}
-static void cache_set_flush(struct closure *cl)
+static CLOSURE_CALLBACK(cache_set_flush)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ closure_type(c, struct cache_set, caching);
struct cache *ca = c->cache;
struct btree *b;
@@ -1722,7 +1718,7 @@ static void cache_set_flush(struct closure *cl)
if (!IS_ERR_OR_NULL(c->gc_thread))
kthread_stop(c->gc_thread);
- if (!IS_ERR(c->root))
+ if (!IS_ERR_OR_NULL(c->root))
list_add(&c->root->list, &c->btree_cache);
/*
@@ -1737,7 +1733,12 @@ static void cache_set_flush(struct closure *cl)
mutex_unlock(&b->write_lock);
}
- if (ca->alloc_thread)
+ /*
+ * If the register_cache_set() call to bch_cache_set_alloc() failed,
+ * ca has not been assigned a value and return error.
+ * So we need check ca is not NULL during bch_cache_set_unregister().
+ */
+ if (ca && ca->alloc_thread)
kthread_stop(ca->alloc_thread);
if (c->journal.cur) {
@@ -1805,9 +1806,9 @@ static void conditional_stop_bcache_device(struct cache_set *c,
}
}
-static void __cache_set_unregister(struct closure *cl)
+static CLOSURE_CALLBACK(__cache_set_unregister)
{
- struct cache_set *c = container_of(cl, struct cache_set, caching);
+ closure_type(c, struct cache_set, caching);
struct cached_dev *dc;
struct bcache_device *d;
size_t i;
@@ -1832,7 +1833,7 @@ static void __cache_set_unregister(struct closure *cl)
mutex_unlock(&bch_register_lock);
- continue_at(cl, cache_set_flush, system_wq);
+ continue_at(cl, cache_set_flush, system_percpu_wq);
}
void bch_cache_set_stop(struct cache_set *c)
@@ -1862,10 +1863,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
__module_get(THIS_MODULE);
closure_init(&c->cl, NULL);
- set_closure_fn(&c->cl, cache_set_free, system_wq);
+ set_closure_fn(&c->cl, cache_set_free, system_percpu_wq);
closure_init(&c->caching, &c->cl);
- set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
+ set_closure_fn(&c->caching, __cache_set_unregister, system_percpu_wq);
/* Maybe create continue_at_noreturn() and use it here? */
closure_set_stopped(&c->cl);
@@ -1911,8 +1912,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
INIT_LIST_HEAD(&c->btree_cache_freed);
INIT_LIST_HEAD(&c->data_buckets);
- iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
- sizeof(struct btree_iter_set);
+ iter_size = sizeof(struct btree_iter) +
+ ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
+ sizeof(struct btree_iter_set);
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
if (!c->devices)
@@ -1937,7 +1939,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
if (!c->uuids)
goto err;
- c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
+ c->moving_gc_wq = alloc_workqueue("bcache_gc",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!c->moving_gc_wq)
goto err;
@@ -2015,7 +2018,7 @@ static int run_cache_set(struct cache_set *c)
c->root = bch_btree_node_get(c, NULL, k,
j->btree_level,
true, NULL);
- if (IS_ERR_OR_NULL(c->root))
+ if (IS_ERR(c->root))
goto err;
list_del_init(&c->root->list);
@@ -2214,10 +2217,10 @@ void bch_cache_release(struct kobject *kobj)
free_fifo(&ca->free[i]);
if (ca->sb_disk)
- put_page(virt_to_page(ca->sb_disk));
+ folio_put(virt_to_folio(ca->sb_disk));
- if (!IS_ERR_OR_NULL(ca->bdev))
- blkdev_put(ca->bdev, ca);
+ if (ca->bdev_file)
+ fput(ca->bdev_file);
kfree(ca);
module_put(THIS_MODULE);
@@ -2234,18 +2237,50 @@ static int cache_alloc(struct cache *ca)
__module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype);
- bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
+ bio_init_inline(&ca->journal.bio, NULL, 8, 0);
/*
- * when ca->sb.njournal_buckets is not zero, journal exists,
- * and in bch_journal_replay(), tree node may split,
- * so bucket of RESERVE_BTREE type is needed,
- * the worst situation is all journal buckets are valid journal,
- * and all the keys need to replay,
- * so the number of RESERVE_BTREE type buckets should be as much
- * as journal buckets
+ * When the cache disk is first registered, ca->sb.njournal_buckets
+ * is zero, and it is assigned in run_cache_set().
+ *
+ * When ca->sb.njournal_buckets is not zero, journal exists,
+ * and in bch_journal_replay(), tree node may split.
+ * The worst situation is all journal buckets are valid journal,
+ * and all the keys need to replay, so the number of RESERVE_BTREE
+ * type buckets should be as much as journal buckets.
+ *
+ * If the number of RESERVE_BTREE type buckets is too few, the
+ * bch_allocator_thread() may hang up and unable to allocate
+ * bucket. The situation is roughly as follows:
+ *
+ * 1. In bch_data_insert_keys(), if the operation is not op->replace,
+ * it will call the bch_journal(), which increments the journal_ref
+ * counter. This counter is only decremented after bch_btree_insert
+ * completes.
+ *
+ * 2. When calling bch_btree_insert, if the btree needs to split,
+ * it will call btree_split() and btree_check_reserve() to check
+ * whether there are enough reserved buckets in the RESERVE_BTREE
+ * slot. If not enough, bcache_btree_root() will repeatedly retry.
+ *
+ * 3. Normally, the bch_allocator_thread is responsible for filling
+ * the reservation slots from the free_inc bucket list. When the
+ * free_inc bucket list is exhausted, the bch_allocator_thread
+ * will call invalidate_buckets() until free_inc is refilled.
+ * Then bch_allocator_thread calls bch_prio_write() once. and
+ * bch_prio_write() will call bch_journal_meta() and waits for
+ * the journal write to complete.
+ *
+ * 4. During journal_write, journal_write_unlocked() is be called.
+ * If journal full occurs, journal_reclaim() and btree_flush_write()
+ * will be called sequentially, then retry journal_write.
+ *
+ * 5. When 2 and 4 occur together, IO will hung up and cannot recover.
+ *
+ * Therefore, reserve more RESERVE_BTREE type buckets.
*/
- btree_buckets = ca->sb.njournal_buckets ?: 8;
+ btree_buckets = clamp_t(size_t, ca->sb.nbuckets >> 7,
+ 32, SB_JOURNAL_BUCKETS);
free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
if (!free) {
ret = -EPERM;
@@ -2337,38 +2372,39 @@ err_free:
}
static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
- struct block_device *bdev, struct cache *ca)
+ struct file *bdev_file,
+ struct cache *ca)
{
const char *err = NULL; /* must be set for any error case */
int ret = 0;
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
- ca->bdev = bdev;
+ ca->bdev_file = bdev_file;
+ ca->bdev = file_bdev(bdev_file);
ca->sb_disk = sb_disk;
- if (bdev_max_discard_sectors((bdev)))
- ca->discard = CACHE_DISCARD(&ca->sb);
-
ret = cache_alloc(ca);
if (ret != 0) {
- /*
- * If we failed here, it means ca->kobj is not initialized yet,
- * kobject_put() won't be called and there is no chance to
- * call blkdev_put() to bdev in bch_cache_release(). So we
- * explicitly call blkdev_put() here.
- */
- blkdev_put(bdev, ca);
if (ret == -ENOMEM)
err = "cache_alloc(): -ENOMEM";
else if (ret == -EPERM)
err = "cache_alloc(): cache device is too small";
else
err = "cache_alloc(): unknown error";
- goto err;
+ pr_notice("error %pg: %s\n", file_bdev(bdev_file), err);
+ /*
+ * If we failed here, it means ca->kobj is not initialized yet,
+ * kobject_put() won't be called and there is no chance to
+ * call fput() to bdev in bch_cache_release(). So
+ * we explicitly call fput() on the block device here.
+ */
+ fput(bdev_file);
+ return ret;
}
- if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) {
- err = "error calling kobject_add";
+ if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) {
+ pr_notice("error %pg: error calling kobject_add\n",
+ file_bdev(bdev_file));
ret = -ENOMEM;
goto out;
}
@@ -2382,15 +2418,10 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto out;
}
- pr_info("registered cache device %pg\n", ca->bdev);
+ pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file));
out:
kobject_put(&ca->kobj);
-
-err:
- if (err)
- pr_notice("error %pg: %s\n", ca->bdev, err);
-
return ret;
}
@@ -2445,7 +2476,7 @@ struct async_reg_args {
char *path;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
- struct block_device *bdev;
+ struct file *bdev_file;
void *holder;
};
@@ -2456,8 +2487,8 @@ static void register_bdev_worker(struct work_struct *work)
container_of(work, struct async_reg_args, reg_work.work);
mutex_lock(&bch_register_lock);
- if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder)
- < 0)
+ if (register_bdev(args->sb, args->sb_disk, args->bdev_file,
+ args->holder) < 0)
fail = true;
mutex_unlock(&bch_register_lock);
@@ -2477,7 +2508,8 @@ static void register_cache_worker(struct work_struct *work)
container_of(work, struct async_reg_args, reg_work.work);
/* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder))
+ if (register_cache(args->sb, args->sb_disk, args->bdev_file,
+ args->holder))
fail = true;
if (fail)
@@ -2497,7 +2529,7 @@ static void register_device_async(struct async_reg_args *args)
INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
/* 10 jiffies is enough for a delay */
- queue_delayed_work(system_wq, &args->reg_work, 10);
+ queue_delayed_work(system_percpu_wq, &args->reg_work, 10);
}
static void *alloc_holder_object(struct cache_sb *sb)
@@ -2514,7 +2546,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
char *path = NULL;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
- struct block_device *bdev, *bdev2;
+ struct file *bdev_file, *bdev_file2;
void *holder = NULL;
ssize_t ret;
bool async_registration = false;
@@ -2547,15 +2579,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
ret = -EINVAL;
err = "failed to open device";
- bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
- if (IS_ERR(bdev))
+ bdev_file = bdev_file_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL);
+ if (IS_ERR(bdev_file))
goto out_free_sb;
- err = "failed to set blocksize";
- if (set_blocksize(bdev, 4096))
- goto out_blkdev_put;
-
- err = read_super(sb, bdev, &sb_disk);
+ err = read_super(sb, file_bdev(bdev_file), &sb_disk);
if (err)
goto out_blkdev_put;
@@ -2563,17 +2591,17 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!holder) {
ret = -ENOMEM;
err = "cannot allocate memory";
- goto out_put_sb_page;
+ goto out_put_sb_folio;
}
/* Now reopen in exclusive mode with proper holder */
- bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE,
- holder, NULL);
- blkdev_put(bdev, NULL);
- bdev = bdev2;
- if (IS_ERR(bdev)) {
- ret = PTR_ERR(bdev);
- bdev = NULL;
+ bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev,
+ BLK_OPEN_READ | BLK_OPEN_WRITE, holder, NULL);
+ fput(bdev_file);
+ bdev_file = bdev_file2;
+ if (IS_ERR(bdev_file)) {
+ ret = PTR_ERR(bdev_file);
+ bdev_file = NULL;
if (ret == -EBUSY) {
dev_t dev;
@@ -2608,7 +2636,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
args->path = path;
args->sb = sb;
args->sb_disk = sb_disk;
- args->bdev = bdev;
+ args->bdev_file = bdev_file;
args->holder = holder;
register_device_async(args);
/* No wait and returns to user space */
@@ -2617,14 +2645,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (SB_IS_BDEV(sb)) {
mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_disk, bdev, holder);
+ ret = register_bdev(sb, sb_disk, bdev_file, holder);
mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */
if (ret < 0)
goto out_free_sb;
} else {
/* blkdev_put() will be called in bch_cache_release() */
- ret = register_cache(sb, sb_disk, bdev, holder);
+ ret = register_cache(sb, sb_disk, bdev_file, holder);
if (ret)
goto out_free_sb;
}
@@ -2637,11 +2665,11 @@ async_done:
out_free_holder:
kfree(holder);
-out_put_sb_page:
- put_page(virt_to_page(sb_disk));
+out_put_sb_folio:
+ folio_put(virt_to_folio(sb_disk));
out_blkdev_put:
- if (bdev)
- blkdev_put(bdev, holder);
+ if (bdev_file)
+ fput(bdev_file);
out_free_sb:
kfree(sb);
out_free_path:
@@ -2875,24 +2903,25 @@ static int __init bcache_init(void)
if (bch_btree_init())
goto err;
- bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
+ bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!bcache_wq)
goto err;
/*
* Let's not make this `WQ_MEM_RECLAIM` for the following reasons:
*
- * 1. It used `system_wq` before which also does no memory reclaim.
+ * 1. It used `system_percpu_wq` before which also does no memory reclaim.
* 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and
* reduced throughput can be observed.
*
- * We still want to user our own queue to not congest the `system_wq`.
+ * We still want to user our own queue to not congest the `system_percpu_wq`.
*/
- bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
+ bch_flush_wq = alloc_workqueue("bch_flush", WQ_PERCPU, 0);
if (!bch_flush_wq)
goto err;
- bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
+ bch_journal_wq = alloc_workqueue("bch_journal",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!bch_journal_wq)
goto err;
@@ -2905,7 +2934,6 @@ static int __init bcache_init(void)
goto err;
bch_debug_init();
- closure_debug_init();
bcache_is_reboot = false;