diff options
Diffstat (limited to 'drivers/md/bcache/super.c')
| -rw-r--r-- | drivers/md/bcache/super.c | 346 |
1 files changed, 187 insertions, 159 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 0ae2b3676293..c17d4517af22 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -168,14 +168,14 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, { const char *err; struct cache_sb_disk *s; - struct page *page; + struct folio *folio; unsigned int i; - page = read_cache_page_gfp(bdev->bd_inode->i_mapping, - SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); - if (IS_ERR(page)) + folio = mapping_read_folio_gfp(bdev->bd_mapping, + SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL); + if (IS_ERR(folio)) return "IO error"; - s = page_address(page) + offset_in_page(SB_OFFSET); + s = folio_address(folio) + offset_in_folio(folio, SB_OFFSET); sb->offset = le64_to_cpu(s->offset); sb->version = le64_to_cpu(s->version); @@ -272,7 +272,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, *res = s; return NULL; err: - put_page(page); + folio_put(folio); return err; } @@ -293,8 +293,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META; bio->bi_iter.bi_sector = SB_SECTOR; - __bio_add_page(bio, virt_to_page(out), SB_SIZE, - offset_in_page(out)); + bio_add_virt_nofail(bio, out, SB_SIZE); out->offset = cpu_to_le64(sb->offset); @@ -327,9 +326,9 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, submit_bio(bio); } -static void bch_write_bdev_super_unlock(struct closure *cl) +static CLOSURE_CALLBACK(bch_write_bdev_super_unlock) { - struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write); + closure_type(dc, struct cached_dev, sb_write); up(&dc->sb_write_mutex); } @@ -363,9 +362,9 @@ static void write_super_endio(struct bio *bio) closure_put(&ca->set->sb_write); } -static void bcache_write_super_unlock(struct closure *cl) +static CLOSURE_CALLBACK(bcache_write_super_unlock) { - struct cache_set *c = container_of(cl, struct cache_set, sb_write); + closure_type(c, struct cache_set, sb_write); up(&c->sb_write_mutex); } @@ -407,9 +406,9 @@ static void uuid_endio(struct bio *bio) closure_put(cl); } -static void uuid_io_unlock(struct closure *cl) +static CLOSURE_CALLBACK(uuid_io_unlock) { - struct cache_set *c = container_of(cl, struct cache_set, uuid_write); + closure_type(c, struct cache_set, uuid_write); up(&c->uuid_write_mutex); } @@ -546,7 +545,8 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { - static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + static const char zero_uuid[16] __nonstring = + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; return uuid_find(c, zero_uuid); } @@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d) bcache_device_detach(d); if (disk) { - ida_simple_remove(&bcache_device_idx, - first_minor_to_idx(disk->first_minor)); + ida_free(&bcache_device_idx, + first_minor_to_idx(disk->first_minor)); put_disk(disk); } @@ -897,14 +897,30 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, sector_t sectors, struct block_device *cached_bdev, const struct block_device_operations *ops) { - struct request_queue *q; const size_t max_stripes = min_t(size_t, INT_MAX, SIZE_MAX / sizeof(atomic_t)); + struct queue_limits lim = { + .max_hw_sectors = UINT_MAX, + .max_sectors = UINT_MAX, + .max_segment_size = UINT_MAX, + .max_segments = BIO_MAX_VECS, + .max_hw_discard_sectors = UINT_MAX, + .io_min = block_size, + .logical_block_size = block_size, + .physical_block_size = block_size, + .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA, + }; uint64_t n; int idx; + if (cached_bdev) { + d->stripe_size = bdev_io_opt(cached_bdev) >> SECTOR_SHIFT; + lim.io_opt = umax(block_size, bdev_io_opt(cached_bdev)); + } if (!d->stripe_size) d->stripe_size = 1 << 31; + else if (d->stripe_size < BCH_MIN_STRIPE_SZ) + d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size); n = DIV_ROUND_UP_ULL(sectors, d->stripe_size); if (!n || n > max_stripes) { @@ -924,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, if (!d->full_dirty_stripes) goto out_free_stripe_sectors_dirty; - idx = ida_simple_get(&bcache_device_idx, 0, - BCACHE_DEVICE_IDX_MAX, GFP_KERNEL); + idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1, + GFP_KERNEL); if (idx < 0) goto out_free_full_dirty_stripes; @@ -933,53 +949,37 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER)) goto out_ida_remove; - d->disk = blk_alloc_disk(NUMA_NO_NODE); - if (!d->disk) - goto out_bioset_exit; - - set_capacity(d->disk, sectors); - snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); - - d->disk->major = bcache_major; - d->disk->first_minor = idx_to_first_minor(idx); - d->disk->minors = BCACHE_MINORS; - d->disk->fops = ops; - d->disk->private_data = d; - - q = d->disk->queue; - q->limits.max_hw_sectors = UINT_MAX; - q->limits.max_sectors = UINT_MAX; - q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_VECS; - blk_queue_max_discard_sectors(q, UINT_MAX); - q->limits.discard_granularity = 512; - q->limits.io_min = block_size; - q->limits.logical_block_size = block_size; - q->limits.physical_block_size = block_size; - - if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) { + if (lim.logical_block_size > PAGE_SIZE && cached_bdev) { /* * This should only happen with BCACHE_SB_VERSION_BDEV. * Block/page size is checked for BCACHE_SB_VERSION_CDEV. */ - pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", - d->disk->disk_name, q->limits.logical_block_size, + pr_info("bcache%i: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n", + idx, lim.logical_block_size, PAGE_SIZE, bdev_logical_block_size(cached_bdev)); /* This also adjusts physical block size/min io size if needed */ - blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev)); + lim.logical_block_size = bdev_logical_block_size(cached_bdev); } - blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); + d->disk = blk_alloc_disk(&lim, NUMA_NO_NODE); + if (IS_ERR(d->disk)) + goto out_bioset_exit; - blk_queue_write_cache(q, true, true); + set_capacity(d->disk, sectors); + snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx); + d->disk->major = bcache_major; + d->disk->first_minor = idx_to_first_minor(idx); + d->disk->minors = BCACHE_MINORS; + d->disk->fops = ops; + d->disk->private_data = d; return 0; out_bioset_exit: bioset_exit(&d->bio_split); out_ida_remove: - ida_simple_remove(&bcache_device_idx, idx); + ida_free(&bcache_device_idx, idx); out_free_full_dirty_stripes: kvfree(d->full_dirty_stripes); out_free_stripe_sectors_dirty: @@ -1342,9 +1342,9 @@ void bch_cached_dev_release(struct kobject *kobj) module_put(THIS_MODULE); } -static void cached_dev_free(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_free) { - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + closure_type(dc, struct cached_dev, disk.cl); if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) cancel_writeback_rate_update_dwork(dc); @@ -1366,19 +1366,19 @@ static void cached_dev_free(struct closure *cl) mutex_unlock(&bch_register_lock); if (dc->sb_disk) - put_page(virt_to_page(dc->sb_disk)); + folio_put(virt_to_folio(dc->sb_disk)); - if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, dc); + if (dc->bdev_file) + fput(dc->bdev_file); wake_up(&unregister_wait); kobject_put(&dc->disk.kobj); } -static void cached_dev_flush(struct closure *cl) +static CLOSURE_CALLBACK(cached_dev_flush) { - struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); + closure_type(dc, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; mutex_lock(&bch_register_lock); @@ -1388,7 +1388,7 @@ static void cached_dev_flush(struct closure *cl) bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); - continue_at(cl, cached_dev_free, system_wq); + continue_at(cl, cached_dev_free, system_percpu_wq); } static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) @@ -1400,7 +1400,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) __module_get(THIS_MODULE); INIT_LIST_HEAD(&dc->list); closure_init(&dc->disk.cl, NULL); - set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq); + set_closure_fn(&dc->disk.cl, cached_dev_flush, system_percpu_wq); kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype); INIT_WORK(&dc->detach, cached_dev_detach_finish); sema_init(&dc->sb_write_mutex, 1); @@ -1415,11 +1415,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); } - dc->disk.stripe_size = q->limits.io_opt >> 9; - - if (dc->disk.stripe_size) - dc->partial_stripes_expensive = - q->limits.raid_partial_stripes_expensive; + if (bdev_io_opt(dc->bdev)) + dc->partial_stripes_expensive = !!(q->limits.features & + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE); ret = bcache_device_init(&dc->disk, block_size, bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, @@ -1427,9 +1425,6 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) if (ret) return ret; - blk_queue_io_opt(dc->disk.disk->queue, - max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q))); - atomic_set(&dc->io_errors, 0); dc->io_disable = false; dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; @@ -1444,7 +1439,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) /* Cached device - bcache superblock */ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct block_device *bdev, + struct file *bdev_file, struct cached_dev *dc) { const char *err = "cannot allocate memory"; @@ -1452,14 +1447,15 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, int ret = -ENOMEM; memcpy(&dc->sb, sb, sizeof(struct cache_sb)); - dc->bdev = bdev; + dc->bdev_file = bdev_file; + dc->bdev = file_bdev(bdev_file); dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) goto err; err = "error creating kobject"; - if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) + if (kobject_add(&dc->disk.kobj, bdev_kobj(dc->bdev), "bcache")) goto err; if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; @@ -1496,9 +1492,9 @@ void bch_flash_dev_release(struct kobject *kobj) kfree(d); } -static void flash_dev_free(struct closure *cl) +static CLOSURE_CALLBACK(flash_dev_free) { - struct bcache_device *d = container_of(cl, struct bcache_device, cl); + closure_type(d, struct bcache_device, cl); mutex_lock(&bch_register_lock); atomic_long_sub(bcache_dev_sectors_dirty(d), @@ -1509,15 +1505,15 @@ static void flash_dev_free(struct closure *cl) kobject_put(&d->kobj); } -static void flash_dev_flush(struct closure *cl) +static CLOSURE_CALLBACK(flash_dev_flush) { - struct bcache_device *d = container_of(cl, struct bcache_device, cl); + closure_type(d, struct bcache_device, cl); mutex_lock(&bch_register_lock); bcache_device_unlink(d); mutex_unlock(&bch_register_lock); kobject_del(&d->kobj); - continue_at(cl, flash_dev_free, system_wq); + continue_at(cl, flash_dev_free, system_percpu_wq); } static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) @@ -1529,7 +1525,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) goto err_ret; closure_init(&d->cl, NULL); - set_closure_fn(&d->cl, flash_dev_flush, system_wq); + set_closure_fn(&d->cl, flash_dev_flush, system_percpu_wq); kobject_init(&d->kobj, &bch_flash_dev_ktype); @@ -1667,9 +1663,9 @@ void bch_cache_set_release(struct kobject *kobj) module_put(THIS_MODULE); } -static void cache_set_free(struct closure *cl) +static CLOSURE_CALLBACK(cache_set_free) { - struct cache_set *c = container_of(cl, struct cache_set, cl); + closure_type(c, struct cache_set, cl); struct cache *ca; debugfs_remove(c->debug); @@ -1708,9 +1704,9 @@ static void cache_set_free(struct closure *cl) kobject_put(&c->kobj); } -static void cache_set_flush(struct closure *cl) +static CLOSURE_CALLBACK(cache_set_flush) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + closure_type(c, struct cache_set, caching); struct cache *ca = c->cache; struct btree *b; @@ -1722,7 +1718,7 @@ static void cache_set_flush(struct closure *cl) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR(c->root)) + if (!IS_ERR_OR_NULL(c->root)) list_add(&c->root->list, &c->btree_cache); /* @@ -1737,7 +1733,12 @@ static void cache_set_flush(struct closure *cl) mutex_unlock(&b->write_lock); } - if (ca->alloc_thread) + /* + * If the register_cache_set() call to bch_cache_set_alloc() failed, + * ca has not been assigned a value and return error. + * So we need check ca is not NULL during bch_cache_set_unregister(). + */ + if (ca && ca->alloc_thread) kthread_stop(ca->alloc_thread); if (c->journal.cur) { @@ -1805,9 +1806,9 @@ static void conditional_stop_bcache_device(struct cache_set *c, } } -static void __cache_set_unregister(struct closure *cl) +static CLOSURE_CALLBACK(__cache_set_unregister) { - struct cache_set *c = container_of(cl, struct cache_set, caching); + closure_type(c, struct cache_set, caching); struct cached_dev *dc; struct bcache_device *d; size_t i; @@ -1832,7 +1833,7 @@ static void __cache_set_unregister(struct closure *cl) mutex_unlock(&bch_register_lock); - continue_at(cl, cache_set_flush, system_wq); + continue_at(cl, cache_set_flush, system_percpu_wq); } void bch_cache_set_stop(struct cache_set *c) @@ -1862,10 +1863,10 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) __module_get(THIS_MODULE); closure_init(&c->cl, NULL); - set_closure_fn(&c->cl, cache_set_free, system_wq); + set_closure_fn(&c->cl, cache_set_free, system_percpu_wq); closure_init(&c->caching, &c->cl); - set_closure_fn(&c->caching, __cache_set_unregister, system_wq); + set_closure_fn(&c->caching, __cache_set_unregister, system_percpu_wq); /* Maybe create continue_at_noreturn() and use it here? */ closure_set_stopped(&c->cl); @@ -1911,8 +1912,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) INIT_LIST_HEAD(&c->btree_cache_freed); INIT_LIST_HEAD(&c->data_buckets); - iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) * - sizeof(struct btree_iter_set); + iter_size = sizeof(struct btree_iter) + + ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) * + sizeof(struct btree_iter_set); c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL); if (!c->devices) @@ -1937,7 +1939,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) if (!c->uuids) goto err; - c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0); + c->moving_gc_wq = alloc_workqueue("bcache_gc", + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!c->moving_gc_wq) goto err; @@ -2015,7 +2018,7 @@ static int run_cache_set(struct cache_set *c) c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; list_del_init(&c->root->list); @@ -2214,10 +2217,10 @@ void bch_cache_release(struct kobject *kobj) free_fifo(&ca->free[i]); if (ca->sb_disk) - put_page(virt_to_page(ca->sb_disk)); + folio_put(virt_to_folio(ca->sb_disk)); - if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, ca); + if (ca->bdev_file) + fput(ca->bdev_file); kfree(ca); module_put(THIS_MODULE); @@ -2234,18 +2237,50 @@ static int cache_alloc(struct cache *ca) __module_get(THIS_MODULE); kobject_init(&ca->kobj, &bch_cache_ktype); - bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0); + bio_init_inline(&ca->journal.bio, NULL, 8, 0); /* - * when ca->sb.njournal_buckets is not zero, journal exists, - * and in bch_journal_replay(), tree node may split, - * so bucket of RESERVE_BTREE type is needed, - * the worst situation is all journal buckets are valid journal, - * and all the keys need to replay, - * so the number of RESERVE_BTREE type buckets should be as much - * as journal buckets + * When the cache disk is first registered, ca->sb.njournal_buckets + * is zero, and it is assigned in run_cache_set(). + * + * When ca->sb.njournal_buckets is not zero, journal exists, + * and in bch_journal_replay(), tree node may split. + * The worst situation is all journal buckets are valid journal, + * and all the keys need to replay, so the number of RESERVE_BTREE + * type buckets should be as much as journal buckets. + * + * If the number of RESERVE_BTREE type buckets is too few, the + * bch_allocator_thread() may hang up and unable to allocate + * bucket. The situation is roughly as follows: + * + * 1. In bch_data_insert_keys(), if the operation is not op->replace, + * it will call the bch_journal(), which increments the journal_ref + * counter. This counter is only decremented after bch_btree_insert + * completes. + * + * 2. When calling bch_btree_insert, if the btree needs to split, + * it will call btree_split() and btree_check_reserve() to check + * whether there are enough reserved buckets in the RESERVE_BTREE + * slot. If not enough, bcache_btree_root() will repeatedly retry. + * + * 3. Normally, the bch_allocator_thread is responsible for filling + * the reservation slots from the free_inc bucket list. When the + * free_inc bucket list is exhausted, the bch_allocator_thread + * will call invalidate_buckets() until free_inc is refilled. + * Then bch_allocator_thread calls bch_prio_write() once. and + * bch_prio_write() will call bch_journal_meta() and waits for + * the journal write to complete. + * + * 4. During journal_write, journal_write_unlocked() is be called. + * If journal full occurs, journal_reclaim() and btree_flush_write() + * will be called sequentially, then retry journal_write. + * + * 5. When 2 and 4 occur together, IO will hung up and cannot recover. + * + * Therefore, reserve more RESERVE_BTREE type buckets. */ - btree_buckets = ca->sb.njournal_buckets ?: 8; + btree_buckets = clamp_t(size_t, ca->sb.nbuckets >> 7, + 32, SB_JOURNAL_BUCKETS); free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!free) { ret = -EPERM; @@ -2337,38 +2372,39 @@ err_free: } static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, - struct block_device *bdev, struct cache *ca) + struct file *bdev_file, + struct cache *ca) { const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); - ca->bdev = bdev; + ca->bdev_file = bdev_file; + ca->bdev = file_bdev(bdev_file); ca->sb_disk = sb_disk; - if (bdev_max_discard_sectors((bdev))) - ca->discard = CACHE_DISCARD(&ca->sb); - ret = cache_alloc(ca); if (ret != 0) { - /* - * If we failed here, it means ca->kobj is not initialized yet, - * kobject_put() won't be called and there is no chance to - * call blkdev_put() to bdev in bch_cache_release(). So we - * explicitly call blkdev_put() here. - */ - blkdev_put(bdev, ca); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) err = "cache_alloc(): cache device is too small"; else err = "cache_alloc(): unknown error"; - goto err; + pr_notice("error %pg: %s\n", file_bdev(bdev_file), err); + /* + * If we failed here, it means ca->kobj is not initialized yet, + * kobject_put() won't be called and there is no chance to + * call fput() to bdev in bch_cache_release(). So + * we explicitly call fput() on the block device here. + */ + fput(bdev_file); + return ret; } - if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { - err = "error calling kobject_add"; + if (kobject_add(&ca->kobj, bdev_kobj(file_bdev(bdev_file)), "bcache")) { + pr_notice("error %pg: error calling kobject_add\n", + file_bdev(bdev_file)); ret = -ENOMEM; goto out; } @@ -2382,15 +2418,10 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %pg\n", ca->bdev); + pr_info("registered cache device %pg\n", file_bdev(ca->bdev_file)); out: kobject_put(&ca->kobj); - -err: - if (err) - pr_notice("error %pg: %s\n", ca->bdev, err); - return ret; } @@ -2445,7 +2476,7 @@ struct async_reg_args { char *path; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev; + struct file *bdev_file; void *holder; }; @@ -2456,8 +2487,8 @@ static void register_bdev_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) - < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev_file, + args->holder) < 0) fail = true; mutex_unlock(&bch_register_lock); @@ -2477,7 +2508,8 @@ static void register_cache_worker(struct work_struct *work) container_of(work, struct async_reg_args, reg_work.work); /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) + if (register_cache(args->sb, args->sb_disk, args->bdev_file, + args->holder)) fail = true; if (fail) @@ -2497,7 +2529,7 @@ static void register_device_async(struct async_reg_args *args) INIT_DELAYED_WORK(&args->reg_work, register_cache_worker); /* 10 jiffies is enough for a delay */ - queue_delayed_work(system_wq, &args->reg_work, 10); + queue_delayed_work(system_percpu_wq, &args->reg_work, 10); } static void *alloc_holder_object(struct cache_sb *sb) @@ -2514,7 +2546,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev, *bdev2; + struct file *bdev_file, *bdev_file2; void *holder = NULL; ssize_t ret; bool async_registration = false; @@ -2547,15 +2579,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); - if (IS_ERR(bdev)) + bdev_file = bdev_file_open_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev_file)) goto out_free_sb; - err = "failed to set blocksize"; - if (set_blocksize(bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev, &sb_disk); + err = read_super(sb, file_bdev(bdev_file), &sb_disk); if (err) goto out_blkdev_put; @@ -2563,17 +2591,17 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!holder) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_put_sb_folio; } /* Now reopen in exclusive mode with proper holder */ - bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, - holder, NULL); - blkdev_put(bdev, NULL); - bdev = bdev2; - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - bdev = NULL; + bdev_file2 = bdev_file_open_by_dev(file_bdev(bdev_file)->bd_dev, + BLK_OPEN_READ | BLK_OPEN_WRITE, holder, NULL); + fput(bdev_file); + bdev_file = bdev_file2; + if (IS_ERR(bdev_file)) { + ret = PTR_ERR(bdev_file); + bdev_file = NULL; if (ret == -EBUSY) { dev_t dev; @@ -2608,7 +2636,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, args->path = path; args->sb = sb; args->sb_disk = sb_disk; - args->bdev = bdev; + args->bdev_file = bdev_file; args->holder = holder; register_device_async(args); /* No wait and returns to user space */ @@ -2617,14 +2645,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (SB_IS_BDEV(sb)) { mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, holder); + ret = register_bdev(sb, sb_disk, bdev_file, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, holder); + ret = register_cache(sb, sb_disk, bdev_file, holder); if (ret) goto out_free_sb; } @@ -2637,11 +2665,11 @@ async_done: out_free_holder: kfree(holder); -out_put_sb_page: - put_page(virt_to_page(sb_disk)); +out_put_sb_folio: + folio_put(virt_to_folio(sb_disk)); out_blkdev_put: - if (bdev) - blkdev_put(bdev, holder); + if (bdev_file) + fput(bdev_file); out_free_sb: kfree(sb); out_free_path: @@ -2875,24 +2903,25 @@ static int __init bcache_init(void) if (bch_btree_init()) goto err; - bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); + bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!bcache_wq) goto err; /* * Let's not make this `WQ_MEM_RECLAIM` for the following reasons: * - * 1. It used `system_wq` before which also does no memory reclaim. + * 1. It used `system_percpu_wq` before which also does no memory reclaim. * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and * reduced throughput can be observed. * - * We still want to user our own queue to not congest the `system_wq`. + * We still want to user our own queue to not congest the `system_percpu_wq`. */ - bch_flush_wq = alloc_workqueue("bch_flush", 0, 0); + bch_flush_wq = alloc_workqueue("bch_flush", WQ_PERCPU, 0); if (!bch_flush_wq) goto err; - bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0); + bch_journal_wq = alloc_workqueue("bch_journal", + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!bch_journal_wq) goto err; @@ -2905,7 +2934,6 @@ static int __init bcache_init(void) goto err; bch_debug_init(); - closure_debug_init(); bcache_is_reboot = false; |
