summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile4
-rw-r--r--block/bdev.c36
-rw-r--r--block/bfq-cgroup.c12
-rw-r--r--block/bio.c44
-rw-r--r--block/blk-cgroup.c10
-rw-r--r--block/blk-core.c113
-rw-r--r--block/blk-crypto-fallback.c118
-rw-r--r--block/blk-crypto-profile.c565
-rw-r--r--block/blk-crypto.c29
-rw-r--r--block/blk-flush.c12
-rw-r--r--block/blk-ia-ranges.c348
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-merge.c23
-rw-r--r--block/blk-mq-debugfs.c4
-rw-r--r--block/blk-mq-sched.c19
-rw-r--r--block/blk-mq-sched.h12
-rw-r--r--block/blk-mq-tag.c7
-rw-r--r--block/blk-mq-tag.h23
-rw-r--r--block/blk-mq.c502
-rw-r--r--block/blk-mq.h36
-rw-r--r--block/blk-settings.c20
-rw-r--r--block/blk-sysfs.c26
-rw-r--r--block/blk-wbt.c3
-rw-r--r--block/blk.h43
-rw-r--r--block/bsg-lib.c32
-rw-r--r--block/fops.c196
-rw-r--r--block/genhd.c57
-rw-r--r--block/ioctl.c20
-rw-r--r--block/keyslot-manager.c579
-rw-r--r--block/partitions/core.c2
-rw-r--r--block/partitions/efi.c2
-rw-r--r--block/partitions/ibm.c19
32 files changed, 1754 insertions, 1166 deletions
diff --git a/block/Makefile b/block/Makefile
index 74df168729ec..44df57e562bf 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
- disk-events.o
+ disk-events.o blk-ia-ranges.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
@@ -36,6 +36,6 @@ obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
obj-$(CONFIG_BLK_PM) += blk-pm.o
-obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o blk-crypto.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o
obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o
obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o
diff --git a/block/bdev.c b/block/bdev.c
index cff0bb3a4578..b4dab2fb6a74 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -185,14 +185,13 @@ int sb_min_blocksize(struct super_block *sb, int size)
EXPORT_SYMBOL(sb_min_blocksize);
-int __sync_blockdev(struct block_device *bdev, int wait)
+int sync_blockdev_nowait(struct block_device *bdev)
{
if (!bdev)
return 0;
- if (!wait)
- return filemap_flush(bdev->bd_inode->i_mapping);
- return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+ return filemap_flush(bdev->bd_inode->i_mapping);
}
+EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
/*
* Write out and wait upon all the dirty data associated with a block
@@ -200,7 +199,9 @@ int __sync_blockdev(struct block_device *bdev, int wait)
*/
int sync_blockdev(struct block_device *bdev)
{
- return __sync_blockdev(bdev, 1);
+ if (!bdev)
+ return 0;
+ return filemap_write_and_wait(bdev->bd_inode->i_mapping);
}
EXPORT_SYMBOL(sync_blockdev);
@@ -964,9 +965,11 @@ EXPORT_SYMBOL(blkdev_put);
* @pathname: special file representing the block device
* @dev: return value of the block device's dev_t
*
- * Get a reference to the blockdevice at @pathname in the current
- * namespace if possible and return it. Return ERR_PTR(error)
- * otherwise.
+ * Lookup the block device's dev_t at @pathname in the current
+ * namespace if possible and return it by @dev.
+ *
+ * RETURNS:
+ * 0 if succeeded, errno otherwise.
*/
int lookup_bdev(const char *pathname, dev_t *dev)
{
@@ -1018,7 +1021,7 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
}
EXPORT_SYMBOL(__invalidate_device);
-void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+void sync_bdevs(bool wait)
{
struct inode *inode, *old_inode = NULL;
@@ -1049,8 +1052,19 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
bdev = I_BDEV(inode);
mutex_lock(&bdev->bd_disk->open_mutex);
- if (bdev->bd_openers)
- func(bdev, arg);
+ if (!bdev->bd_openers) {
+ ; /* skip */
+ } else if (wait) {
+ /*
+ * We keep the error status of individual mapping so
+ * that applications can catch the writeback error using
+ * fsync(2). See filemap_fdatawait_keep_errors() for
+ * details.
+ */
+ filemap_fdatawait_keep_errors(inode->i_mapping);
+ } else {
+ filemap_fdatawrite(inode->i_mapping);
+ }
mutex_unlock(&bdev->bd_disk->open_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 882ec4bc51ad..24a5c5329bcd 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -463,7 +463,7 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
{
if (blkg_rwstat_init(&stats->bytes, gfp) ||
blkg_rwstat_init(&stats->ios, gfp))
- return -ENOMEM;
+ goto error;
#ifdef CONFIG_BFQ_CGROUP_DEBUG
if (blkg_rwstat_init(&stats->merged, gfp) ||
@@ -476,13 +476,15 @@ static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
bfq_stat_init(&stats->dequeue, gfp) ||
bfq_stat_init(&stats->group_wait_time, gfp) ||
bfq_stat_init(&stats->idle_time, gfp) ||
- bfq_stat_init(&stats->empty_time, gfp)) {
- bfqg_stats_exit(stats);
- return -ENOMEM;
- }
+ bfq_stat_init(&stats->empty_time, gfp))
+ goto error;
#endif
return 0;
+
+error:
+ bfqg_stats_exit(stats);
+ return -ENOMEM;
}
static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
diff --git a/block/bio.c b/block/bio.c
index 4f397ba47db5..15ab0d6d1c06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1033,52 +1033,40 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
-void bio_release_pages(struct bio *bio, bool mark_dirty)
+void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
struct bvec_iter_all iter_all;
struct bio_vec *bvec;
- if (bio_flagged(bio, BIO_NO_PAGE_REF))
- return;
-
bio_for_each_segment_all(bvec, bio, iter_all) {
if (mark_dirty && !PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
put_page(bvec->bv_page);
}
}
-EXPORT_SYMBOL_GPL(bio_release_pages);
+EXPORT_SYMBOL_GPL(__bio_release_pages);
-static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{
+ size_t size = iov_iter_count(iter);
+
WARN_ON_ONCE(bio->bi_max_vecs);
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ size_t max_sectors = queue_max_zone_append_sectors(q);
+
+ size = min(size, max_sectors << SECTOR_SHIFT);
+ }
+
bio->bi_vcnt = iter->nr_segs;
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
- bio->bi_iter.bi_size = iter->count;
+ bio->bi_iter.bi_size = size;
bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
}
-static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
-{
- __bio_iov_bvec_set(bio, iter);
- iov_iter_advance(iter, iter->count);
- return 0;
-}
-
-static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
-{
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- struct iov_iter i = *iter;
-
- iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
- __bio_iov_bvec_set(bio, &i);
- iov_iter_advance(iter, i.count);
- return 0;
-}
-
static void bio_put_pages(struct page **pages, size_t size, size_t off)
{
size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
@@ -1220,9 +1208,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
int ret = 0;
if (iov_iter_is_bvec(iter)) {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- return bio_iov_bvec_set_append(bio, iter);
- return bio_iov_bvec_set(bio, iter);
+ bio_iov_bvec_set(bio, iter);
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ return 0;
}
do {
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8908298d6ad3..88b1fce90520 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -634,6 +634,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
q = bdev_get_queue(bdev);
+ /*
+ * blkcg_deactivate_policy() requires queue to be frozen, we can grab
+ * q_usage_counter to prevent concurrent with blkcg_deactivate_policy().
+ */
+ ret = blk_queue_enter(q, 0);
+ if (ret)
+ return ret;
+
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
@@ -703,6 +711,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
goto success;
}
success:
+ blk_queue_exit(q);
ctx->bdev = bdev;
ctx->blkg = blkg;
ctx->body = input;
@@ -715,6 +724,7 @@ fail_unlock:
rcu_read_unlock();
fail:
blkdev_put_no_open(bdev);
+ blk_queue_exit(q);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
diff --git a/block/blk-core.c b/block/blk-core.c
index d0c2e11411d0..b043de2baaac 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -386,30 +386,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);
-static bool blk_try_enter_queue(struct request_queue *q, bool pm)
-{
- rcu_read_lock();
- if (!percpu_ref_tryget_live(&q->q_usage_counter))
- goto fail;
-
- /*
- * The code that increments the pm_only counter must ensure that the
- * counter is globally visible before the queue is unfrozen.
- */
- if (blk_queue_pm_only(q) &&
- (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
- goto fail_put;
-
- rcu_read_unlock();
- return true;
-
-fail_put:
- percpu_ref_put(&q->q_usage_counter);
-fail:
- rcu_read_unlock();
- return false;
-}
-
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -442,10 +418,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
return 0;
}
-static inline int bio_queue_enter(struct bio *bio)
+int __bio_queue_enter(struct request_queue *q, struct bio *bio)
{
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
while (!blk_try_enter_queue(q, false)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
@@ -597,34 +571,6 @@ bool blk_get_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_get_queue);
-/**
- * blk_get_request - allocate a request
- * @q: request queue to allocate a request for
- * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
- * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
- */
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
- blk_mq_req_flags_t flags)
-{
- struct request *req;
-
- WARN_ON_ONCE(op & REQ_NOWAIT);
- WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
-
- req = blk_mq_alloc_request(q, op, flags);
- if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
- q->mq_ops->initialize_rq_fn(req);
-
- return req;
-}
-EXPORT_SYMBOL(blk_get_request);
-
-void blk_put_request(struct request *req)
-{
- blk_mq_free_request(req);
-}
-EXPORT_SYMBOL(blk_put_request);
-
static void handle_bad_sector(struct bio *bio, sector_t maxsector)
{
char b[BDEVNAME_SIZE];
@@ -770,7 +716,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
-static noinline_for_stack bool submit_bio_checks(struct bio *bio)
+noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -888,22 +834,23 @@ end_io:
return false;
}
-static void __submit_bio(struct bio *bio)
+static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
-
if (unlikely(bio_queue_enter(bio) != 0))
return;
+ if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio))
+ disk->fops->submit_bio(bio);
+ blk_queue_exit(disk->queue);
+}
+
+static void __submit_bio(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
- if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio))
- goto queue_exit;
- if (!disk->fops->submit_bio) {
+ if (!disk->fops->submit_bio)
blk_mq_submit_bio(bio);
- return;
- }
- disk->fops->submit_bio(bio);
-queue_exit:
- blk_queue_exit(disk->queue);
+ else
+ __submit_bio_fops(disk, bio);
}
/*
@@ -1080,7 +1027,7 @@ EXPORT_SYMBOL(submit_bio);
*/
int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
{
- struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
int ret;
@@ -1089,7 +1036,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0;
if (current->plug)
- blk_flush_plug_list(current->plug, false);
+ blk_flush_plug(current->plug, false);
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
return 0;
@@ -1550,11 +1497,12 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
if (tsk->plug)
return;
- INIT_LIST_HEAD(&plug->mq_list);
+ plug->mq_list = NULL;
plug->cached_rq = NULL;
plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
plug->rq_count = 0;
plug->multiple_queues = false;
+ plug->has_elevator = false;
plug->nowait = false;
INIT_LIST_HEAD(&plug->cb_list);
@@ -1636,13 +1584,19 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
}
EXPORT_SYMBOL(blk_check_plugged);
-void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
+void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
- flush_plug_callbacks(plug, from_schedule);
-
- if (!list_empty(&plug->mq_list))
+ if (!list_empty(&plug->cb_list))
+ flush_plug_callbacks(plug, from_schedule);
+ if (!rq_list_empty(plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);
- if (unlikely(!from_schedule && plug->cached_rq))
+ /*
+ * Unconditionally flush out cached requests, even if the unplug
+ * event came from schedule. Since we know hold references to the
+ * queue for cached requests, we don't want a blocked task holding
+ * up a queue freeze/quiesce event.
+ */
+ if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
}
@@ -1658,11 +1612,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
*/
void blk_finish_plug(struct blk_plug *plug)
{
- if (plug != current->plug)
- return;
- blk_flush_plug_list(plug, false);
-
- current->plug = NULL;
+ if (plug == current->plug) {
+ blk_flush_plug(plug, false);
+ current->plug = NULL;
+ }
}
EXPORT_SYMBOL(blk_finish_plug);
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index ec4c7823541c..c87aba8584c6 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -12,9 +12,9 @@
#include <crypto/skcipher.h>
#include <linux/blk-cgroup.h>
#include <linux/blk-crypto.h>
+#include <linux/blk-crypto-profile.h>
#include <linux/blkdev.h>
#include <linux/crypto.h>
-#include <linux/keyslot-manager.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -73,12 +73,12 @@ static mempool_t *bio_fallback_crypt_ctx_pool;
static DEFINE_MUTEX(tfms_init_lock);
static bool tfms_inited[BLK_ENCRYPTION_MODE_MAX];
-static struct blk_crypto_keyslot {
+static struct blk_crypto_fallback_keyslot {
enum blk_crypto_mode_num crypto_mode;
struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
} *blk_crypto_keyslots;
-static struct blk_keyslot_manager blk_crypto_ksm;
+static struct blk_crypto_profile blk_crypto_fallback_profile;
static struct workqueue_struct *blk_crypto_wq;
static mempool_t *blk_crypto_bounce_page_pool;
static struct bio_set crypto_bio_split;
@@ -89,9 +89,9 @@ static struct bio_set crypto_bio_split;
*/
static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE];
-static void blk_crypto_evict_keyslot(unsigned int slot)
+static void blk_crypto_fallback_evict_keyslot(unsigned int slot)
{
- struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot];
+ struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot];
enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode;
int err;
@@ -104,45 +104,41 @@ static void blk_crypto_evict_keyslot(unsigned int slot)
slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID;
}
-static int blk_crypto_keyslot_program(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot)
+static int
+blk_crypto_fallback_keyslot_program(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot)
{
- struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot];
+ struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot];
const enum blk_crypto_mode_num crypto_mode =
key->crypto_cfg.crypto_mode;
int err;
if (crypto_mode != slotp->crypto_mode &&
slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID)
- blk_crypto_evict_keyslot(slot);
+ blk_crypto_fallback_evict_keyslot(slot);
slotp->crypto_mode = crypto_mode;
err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw,
key->size);
if (err) {
- blk_crypto_evict_keyslot(slot);
+ blk_crypto_fallback_evict_keyslot(slot);
return err;
}
return 0;
}
-static int blk_crypto_keyslot_evict(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- unsigned int slot)
+static int blk_crypto_fallback_keyslot_evict(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ unsigned int slot)
{
- blk_crypto_evict_keyslot(slot);
+ blk_crypto_fallback_evict_keyslot(slot);
return 0;
}
-/*
- * The crypto API fallback KSM ops - only used for a bio when it specifies a
- * blk_crypto_key that was not supported by the device's inline encryption
- * hardware.
- */
-static const struct blk_ksm_ll_ops blk_crypto_ksm_ll_ops = {
- .keyslot_program = blk_crypto_keyslot_program,
- .keyslot_evict = blk_crypto_keyslot_evict,
+static const struct blk_crypto_ll_ops blk_crypto_fallback_ll_ops = {
+ .keyslot_program = blk_crypto_fallback_keyslot_program,
+ .keyslot_evict = blk_crypto_fallback_keyslot_evict,
};
static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
@@ -160,7 +156,7 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
bio_endio(src_bio);
}
-static struct bio *blk_crypto_clone_bio(struct bio *bio_src)
+static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
{
struct bvec_iter iter;
struct bio_vec bv;
@@ -187,13 +183,14 @@ static struct bio *blk_crypto_clone_bio(struct bio *bio_src)
return bio;
}
-static bool blk_crypto_alloc_cipher_req(struct blk_ksm_keyslot *slot,
- struct skcipher_request **ciph_req_ret,
- struct crypto_wait *wait)
+static bool
+blk_crypto_fallback_alloc_cipher_req(struct blk_crypto_keyslot *slot,
+ struct skcipher_request **ciph_req_ret,
+ struct crypto_wait *wait)
{
struct skcipher_request *ciph_req;
- const struct blk_crypto_keyslot *slotp;
- int keyslot_idx = blk_ksm_get_slot_idx(slot);
+ const struct blk_crypto_fallback_keyslot *slotp;
+ int keyslot_idx = blk_crypto_keyslot_index(slot);
slotp = &blk_crypto_keyslots[keyslot_idx];
ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode],
@@ -210,7 +207,7 @@ static bool blk_crypto_alloc_cipher_req(struct blk_ksm_keyslot *slot,
return true;
}
-static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
+static bool blk_crypto_fallback_split_bio_if_needed(struct bio **bio_ptr)
{
struct bio *bio = *bio_ptr;
unsigned int i = 0;
@@ -265,7 +262,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
{
struct bio *src_bio, *enc_bio;
struct bio_crypt_ctx *bc;
- struct blk_ksm_keyslot *slot;
+ struct blk_crypto_keyslot *slot;
int data_unit_size;
struct skcipher_request *ciph_req = NULL;
DECLARE_CRYPTO_WAIT(wait);
@@ -277,7 +274,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
blk_status_t blk_st;
/* Split the bio if it's too big for single page bvec */
- if (!blk_crypto_split_bio_if_needed(bio_ptr))
+ if (!blk_crypto_fallback_split_bio_if_needed(bio_ptr))
return false;
src_bio = *bio_ptr;
@@ -285,24 +282,25 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
data_unit_size = bc->bc_key->crypto_cfg.data_unit_size;
/* Allocate bounce bio for encryption */
- enc_bio = blk_crypto_clone_bio(src_bio);
+ enc_bio = blk_crypto_fallback_clone_bio(src_bio);
if (!enc_bio) {
src_bio->bi_status = BLK_STS_RESOURCE;
return false;
}
/*
- * Use the crypto API fallback keyslot manager to get a crypto_skcipher
- * for the algorithm and key specified for this bio.
+ * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
+ * this bio's algorithm and key.
*/
- blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+ blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
src_bio->bi_status = blk_st;
goto out_put_enc_bio;
}
/* and then allocate an skcipher_request for it */
- if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) {
+ if (!blk_crypto_fallback_alloc_cipher_req(slot, &ciph_req, &wait)) {
src_bio->bi_status = BLK_STS_RESOURCE;
goto out_release_keyslot;
}
@@ -363,7 +361,7 @@ out_free_bounce_pages:
out_free_ciph_req:
skcipher_request_free(ciph_req);
out_release_keyslot:
- blk_ksm_put_slot(slot);
+ blk_crypto_put_keyslot(slot);
out_put_enc_bio:
if (enc_bio)
bio_put(enc_bio);
@@ -381,7 +379,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
container_of(work, struct bio_fallback_crypt_ctx, work);
struct bio *bio = f_ctx->bio;
struct bio_crypt_ctx *bc = &f_ctx->crypt_ctx;
- struct blk_ksm_keyslot *slot;
+ struct blk_crypto_keyslot *slot;
struct skcipher_request *ciph_req = NULL;
DECLARE_CRYPTO_WAIT(wait);
u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
@@ -394,17 +392,18 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
blk_status_t blk_st;
/*
- * Use the crypto API fallback keyslot manager to get a crypto_skcipher
- * for the algorithm and key specified for this bio.
+ * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
+ * this bio's algorithm and key.
*/
- blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+ blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
bio->bi_status = blk_st;
goto out_no_keyslot;
}
/* and then allocate an skcipher_request for it */
- if (!blk_crypto_alloc_cipher_req(slot, &ciph_req, &wait)) {
+ if (!blk_crypto_fallback_alloc_cipher_req(slot, &ciph_req, &wait)) {
bio->bi_status = BLK_STS_RESOURCE;
goto out;
}
@@ -435,7 +434,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
out:
skcipher_request_free(ciph_req);
- blk_ksm_put_slot(slot);
+ blk_crypto_put_keyslot(slot);
out_no_keyslot:
mempool_free(f_ctx, bio_fallback_crypt_ctx_pool);
bio_endio(bio);
@@ -474,9 +473,9 @@ static void blk_crypto_fallback_decrypt_endio(struct bio *bio)
* @bio_ptr: pointer to the bio to prepare
*
* If bio is doing a WRITE operation, this splits the bio into two parts if it's
- * too big (see blk_crypto_split_bio_if_needed). It then allocates a bounce bio
- * for the first part, encrypts it, and update bio_ptr to point to the bounce
- * bio.
+ * too big (see blk_crypto_fallback_split_bio_if_needed()). It then allocates a
+ * bounce bio for the first part, encrypts it, and updates bio_ptr to point to
+ * the bounce bio.
*
* For a READ operation, we mark the bio for decryption by using bi_private and
* bi_end_io.
@@ -500,8 +499,8 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
return false;
}
- if (!blk_ksm_crypto_cfg_supported(&blk_crypto_ksm,
- &bc->bc_key->crypto_cfg)) {
+ if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+ &bc->bc_key->crypto_cfg)) {
bio->bi_status = BLK_STS_NOTSUPP;
return false;
}
@@ -527,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
{
- return blk_ksm_evict_key(&blk_crypto_ksm, key);
+ return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
}
static bool blk_crypto_fallback_inited;
@@ -535,6 +534,7 @@ static int blk_crypto_fallback_init(void)
{
int i;
int err;
+ struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
if (blk_crypto_fallback_inited)
return 0;
@@ -545,24 +545,24 @@ static int blk_crypto_fallback_init(void)
if (err)
goto out;
- err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+ err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
if (err)
goto fail_free_bioset;
err = -ENOMEM;
- blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
- blk_crypto_ksm.max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+ profile->ll_ops = blk_crypto_fallback_ll_ops;
+ profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
/* All blk-crypto modes have a crypto API fallback. */
for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
- blk_crypto_ksm.crypto_modes_supported[i] = 0xFFFFFFFF;
- blk_crypto_ksm.crypto_modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+ profile->modes_supported[i] = 0xFFFFFFFF;
+ profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
WQ_UNBOUND | WQ_HIGHPRI |
WQ_MEM_RECLAIM, num_online_cpus());
if (!blk_crypto_wq)
- goto fail_free_ksm;
+ goto fail_destroy_profile;
blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots,
sizeof(blk_crypto_keyslots[0]),
@@ -596,8 +596,8 @@ fail_free_keyslots:
kfree(blk_crypto_keyslots);
fail_free_wq:
destroy_workqueue(blk_crypto_wq);
-fail_free_ksm:
- blk_ksm_destroy(&blk_crypto_ksm);
+fail_destroy_profile:
+ blk_crypto_profile_destroy(profile);
fail_free_bioset:
bioset_exit(&crypto_bio_split);
out:
@@ -611,7 +611,7 @@ out:
int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num)
{
const char *cipher_str = blk_crypto_modes[mode_num].cipher_str;
- struct blk_crypto_keyslot *slotp;
+ struct blk_crypto_fallback_keyslot *slotp;
unsigned int i;
int err = 0;
diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
new file mode 100644
index 000000000000..605ba0626a5c
--- /dev/null
+++ b/block/blk-crypto-profile.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+/**
+ * DOC: blk-crypto profiles
+ *
+ * 'struct blk_crypto_profile' contains all generic inline encryption-related
+ * state for a particular inline encryption device. blk_crypto_profile serves
+ * as the way that drivers for inline encryption hardware expose their crypto
+ * capabilities and certain functions (e.g., functions to program and evict
+ * keys) to upper layers. Device drivers that want to support inline encryption
+ * construct a crypto profile, then associate it with the disk's request_queue.
+ *
+ * If the device has keyslots, then its blk_crypto_profile also handles managing
+ * these keyslots in a device-independent way, using the driver-provided
+ * functions to program and evict keys as needed. This includes keeping track
+ * of which key and how many I/O requests are using each keyslot, getting
+ * keyslots for I/O requests, and handling key eviction requests.
+ *
+ * For more information, see Documentation/block/inline-encryption.rst.
+ */
+
+#define pr_fmt(fmt) "blk-crypto: " fmt
+
+#include <linux/blk-crypto-profile.h>
+#include <linux/device.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
+
+struct blk_crypto_keyslot {
+ atomic_t slot_refs;
+ struct list_head idle_slot_node;
+ struct hlist_node hash_node;
+ const struct blk_crypto_key *key;
+ struct blk_crypto_profile *profile;
+};
+
+static inline void blk_crypto_hw_enter(struct blk_crypto_profile *profile)
+{
+ /*
+ * Calling into the driver requires profile->lock held and the device
+ * resumed. But we must resume the device first, since that can acquire
+ * and release profile->lock via blk_crypto_reprogram_all_keys().
+ */
+ if (profile->dev)
+ pm_runtime_get_sync(profile->dev);
+ down_write(&profile->lock);
+}
+
+static inline void blk_crypto_hw_exit(struct blk_crypto_profile *profile)
+{
+ up_write(&profile->lock);
+ if (profile->dev)
+ pm_runtime_put_sync(profile->dev);
+}
+
+/**
+ * blk_crypto_profile_init() - Initialize a blk_crypto_profile
+ * @profile: the blk_crypto_profile to initialize
+ * @num_slots: the number of keyslots
+ *
+ * Storage drivers must call this when starting to set up a blk_crypto_profile,
+ * before filling in additional fields.
+ *
+ * Return: 0 on success, or else a negative error code.
+ */
+int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+ unsigned int num_slots)
+{
+ unsigned int slot;
+ unsigned int i;
+ unsigned int slot_hashtable_size;
+
+ memset(profile, 0, sizeof(*profile));
+ init_rwsem(&profile->lock);
+
+ if (num_slots == 0)
+ return 0;
+
+ /* Initialize keyslot management data. */
+
+ profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
+ GFP_KERNEL);
+ if (!profile->slots)
+ return -ENOMEM;
+
+ profile->num_slots = num_slots;
+
+ init_waitqueue_head(&profile->idle_slots_wait_queue);
+ INIT_LIST_HEAD(&profile->idle_slots);
+
+ for (slot = 0; slot < num_slots; slot++) {
+ profile->slots[slot].profile = profile;
+ list_add_tail(&profile->slots[slot].idle_slot_node,
+ &profile->idle_slots);
+ }
+
+ spin_lock_init(&profile->idle_slots_lock);
+
+ slot_hashtable_size = roundup_pow_of_two(num_slots);
+ /*
+ * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2
+ * buckets. This only makes a difference when there is only 1 keyslot.
+ */
+ if (slot_hashtable_size < 2)
+ slot_hashtable_size = 2;
+
+ profile->log_slot_ht_size = ilog2(slot_hashtable_size);
+ profile->slot_hashtable =
+ kvmalloc_array(slot_hashtable_size,
+ sizeof(profile->slot_hashtable[0]), GFP_KERNEL);
+ if (!profile->slot_hashtable)
+ goto err_destroy;
+ for (i = 0; i < slot_hashtable_size; i++)
+ INIT_HLIST_HEAD(&profile->slot_hashtable[i]);
+
+ return 0;
+
+err_destroy:
+ blk_crypto_profile_destroy(profile);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_crypto_profile_init);
+
+static void blk_crypto_profile_destroy_callback(void *profile)
+{
+ blk_crypto_profile_destroy(profile);
+}
+
+/**
+ * devm_blk_crypto_profile_init() - Resource-managed blk_crypto_profile_init()
+ * @dev: the device which owns the blk_crypto_profile
+ * @profile: the blk_crypto_profile to initialize
+ * @num_slots: the number of keyslots
+ *
+ * Like blk_crypto_profile_init(), but causes blk_crypto_profile_destroy() to be
+ * called automatically on driver detach.
+ *
+ * Return: 0 on success, or else a negative error code.
+ */
+int devm_blk_crypto_profile_init(struct device *dev,
+ struct blk_crypto_profile *profile,
+ unsigned int num_slots)
+{
+ int err = blk_crypto_profile_init(profile, num_slots);
+
+ if (err)
+ return err;
+
+ return devm_add_action_or_reset(dev,
+ blk_crypto_profile_destroy_callback,
+ profile);
+}
+EXPORT_SYMBOL_GPL(devm_blk_crypto_profile_init);
+
+static inline struct hlist_head *
+blk_crypto_hash_bucket_for_key(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key)
+{
+ return &profile->slot_hashtable[
+ hash_ptr(key, profile->log_slot_ht_size)];
+}
+
+static void
+blk_crypto_remove_slot_from_lru_list(struct blk_crypto_keyslot *slot)
+{
+ struct blk_crypto_profile *profile = slot->profile;
+ unsigned long flags;
+
+ spin_lock_irqsave(&profile->idle_slots_lock, flags);
+ list_del(&slot->idle_slot_node);
+ spin_unlock_irqrestore(&profile->idle_slots_lock, flags);
+}
+
+static struct blk_crypto_keyslot *
+blk_crypto_find_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key)
+{
+ const struct hlist_head *head =
+ blk_crypto_hash_bucket_for_key(profile, key);
+ struct blk_crypto_keyslot *slotp;
+
+ hlist_for_each_entry(slotp, head, hash_node) {
+ if (slotp->key == key)
+ return slotp;
+ }
+ return NULL;
+}
+
+static struct blk_crypto_keyslot *
+blk_crypto_find_and_grab_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key)
+{
+ struct blk_crypto_keyslot *slot;
+
+ slot = blk_crypto_find_keyslot(profile, key);
+ if (!slot)
+ return NULL;
+ if (atomic_inc_return(&slot->slot_refs) == 1) {
+ /* Took first reference to this slot; remove it from LRU list */
+ blk_crypto_remove_slot_from_lru_list(slot);
+ }
+ return slot;
+}
+
+/**
+ * blk_crypto_keyslot_index() - Get the index of a keyslot
+ * @slot: a keyslot that blk_crypto_get_keyslot() returned
+ *
+ * Return: the 0-based index of the keyslot within the device's keyslots.
+ */
+unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot)
+{
+ return slot - slot->profile->slots;
+}
+EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index);
+
+/**
+ * blk_crypto_get_keyslot() - Get a keyslot for a key, if needed.
+ * @profile: the crypto profile of the device the key will be used on
+ * @key: the key that will be used
+ * @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct
+ * will be stored here; otherwise NULL will be stored here.
+ *
+ * If the device has keyslots, this gets a keyslot that's been programmed with
+ * the specified key. If the key is already in a slot, this reuses it;
+ * otherwise this waits for a slot to become idle and programs the key into it.
+ *
+ * This must be paired with a call to blk_crypto_put_keyslot().
+ *
+ * Context: Process context. Takes and releases profile->lock.
+ * Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or
+ * one wasn't needed; or a blk_status_t error on failure.
+ */
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key,
+ struct blk_crypto_keyslot **slot_ptr)
+{
+ struct blk_crypto_keyslot *slot;
+ int slot_idx;
+ int err;
+
+ *slot_ptr = NULL;
+
+ /*
+ * If the device has no concept of "keyslots", then there is no need to
+ * get one.
+ */
+ if (profile->num_slots == 0)
+ return BLK_STS_OK;
+
+ down_read(&profile->lock);
+ slot = blk_crypto_find_and_grab_keyslot(profile, key);
+ up_read(&profile->lock);
+ if (slot)
+ goto success;
+
+ for (;;) {
+ blk_crypto_hw_enter(profile);
+ slot = blk_crypto_find_and_grab_keyslot(profile, key);
+ if (slot) {
+ blk_crypto_hw_exit(profile);
+ goto success;
+ }
+
+ /*
+ * If we're here, that means there wasn't a slot that was
+ * already programmed with the key. So try to program it.
+ */
+ if (!list_empty(&profile->idle_slots))
+ break;
+
+ blk_crypto_hw_exit(profile);
+ wait_event(profile->idle_slots_wait_queue,
+ !list_empty(&profile->idle_slots));
+ }
+
+ slot = list_first_entry(&profile->idle_slots, struct blk_crypto_keyslot,
+ idle_slot_node);
+ slot_idx = blk_crypto_keyslot_index(slot);
+
+ err = profile->ll_ops.keyslot_program(profile, key, slot_idx);
+ if (err) {
+ wake_up(&profile->idle_slots_wait_queue);
+ blk_crypto_hw_exit(profile);
+ return errno_to_blk_status(err);
+ }
+
+ /* Move this slot to the hash list for the new key. */
+ if (slot->key)
+ hlist_del(&slot->hash_node);
+ slot->key = key;
+ hlist_add_head(&slot->hash_node,
+ blk_crypto_hash_bucket_for_key(profile, key));
+
+ atomic_set(&slot->slot_refs, 1);
+
+ blk_crypto_remove_slot_from_lru_list(slot);
+
+ blk_crypto_hw_exit(profile);
+success:
+ *slot_ptr = slot;
+ return BLK_STS_OK;
+}
+
+/**
+ * blk_crypto_put_keyslot() - Release a reference to a keyslot
+ * @slot: The keyslot to release the reference of (may be NULL).
+ *
+ * Context: Any context.
+ */
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot)
+{
+ struct blk_crypto_profile *profile;
+ unsigned long flags;
+
+ if (!slot)
+ return;
+
+ profile = slot->profile;
+
+ if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
+ &profile->idle_slots_lock, flags)) {
+ list_add_tail(&slot->idle_slot_node, &profile->idle_slots);
+ spin_unlock_irqrestore(&profile->idle_slots_lock, flags);
+ wake_up(&profile->idle_slots_wait_queue);
+ }
+}
+
+/**
+ * __blk_crypto_cfg_supported() - Check whether the given crypto profile
+ * supports the given crypto configuration.
+ * @profile: the crypto profile to check
+ * @cfg: the crypto configuration to check for
+ *
+ * Return: %true if @profile supports the given @cfg.
+ */
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+ const struct blk_crypto_config *cfg)
+{
+ if (!profile)
+ return false;
+ if (!(profile->modes_supported[cfg->crypto_mode] & cfg->data_unit_size))
+ return false;
+ if (profile->max_dun_bytes_supported < cfg->dun_bytes)
+ return false;
+ return true;
+}
+
+/**
+ * __blk_crypto_evict_key() - Evict a key from a device.
+ * @profile: the crypto profile of the device
+ * @key: the key to evict. It must not still be used in any I/O.
+ *
+ * If the device has keyslots, this finds the keyslot (if any) that contains the
+ * specified key and calls the driver's keyslot_evict function to evict it.
+ *
+ * Otherwise, this just calls the driver's keyslot_evict function if it is
+ * implemented, passing just the key (without any particular keyslot). This
+ * allows layered devices to evict the key from their underlying devices.
+ *
+ * Context: Process context. Takes and releases profile->lock.
+ * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
+ * if the keyslot is still in use, or another -errno value on other
+ * error.
+ */
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+ const struct blk_crypto_key *key)
+{
+ struct blk_crypto_keyslot *slot;
+ int err = 0;
+
+ if (profile->num_slots == 0) {
+ if (profile->ll_ops.keyslot_evict) {
+ blk_crypto_hw_enter(profile);
+ err = profile->ll_ops.keyslot_evict(profile, key, -1);
+ blk_crypto_hw_exit(profile);
+ return err;
+ }
+ return 0;
+ }
+
+ blk_crypto_hw_enter(profile);
+ slot = blk_crypto_find_keyslot(profile, key);
+ if (!slot)
+ goto out_unlock;
+
+ if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
+ err = -EBUSY;
+ goto out_unlock;
+ }
+ err = profile->ll_ops.keyslot_evict(profile, key,
+ blk_crypto_keyslot_index(slot));
+ if (err)
+ goto out_unlock;
+
+ hlist_del(&slot->hash_node);
+ slot->key = NULL;
+ err = 0;
+out_unlock:
+ blk_crypto_hw_exit(profile);
+ return err;
+}
+
+/**
+ * blk_crypto_reprogram_all_keys() - Re-program all keyslots.
+ * @profile: The crypto profile
+ *
+ * Re-program all keyslots that are supposed to have a key programmed. This is
+ * intended only for use by drivers for hardware that loses its keys on reset.
+ *
+ * Context: Process context. Takes and releases profile->lock.
+ */
+void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile)
+{
+ unsigned int slot;
+
+ if (profile->num_slots == 0)
+ return;
+
+ /* This is for device initialization, so don't resume the device */
+ down_write(&profile->lock);
+ for (slot = 0; slot < profile->num_slots; slot++) {
+ const struct blk_crypto_key *key = profile->slots[slot].key;
+ int err;
+
+ if (!key)
+ continue;
+
+ err = profile->ll_ops.keyslot_program(profile, key, slot);
+ WARN_ON(err);
+ }
+ up_write(&profile->lock);
+}
+EXPORT_SYMBOL_GPL(blk_crypto_reprogram_all_keys);
+
+void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
+{
+ if (!profile)
+ return;
+ kvfree(profile->slot_hashtable);
+ kvfree_sensitive(profile->slots,
+ sizeof(profile->slots[0]) * profile->num_slots);
+ memzero_explicit(profile, sizeof(*profile));
+}
+EXPORT_SYMBOL_GPL(blk_crypto_profile_destroy);
+
+bool blk_crypto_register(struct blk_crypto_profile *profile,
+ struct request_queue *q)
+{
+ if (blk_integrity_queue_supports_integrity(q)) {
+ pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
+ return false;
+ }
+ q->crypto_profile = profile;
+ return true;
+}
+EXPORT_SYMBOL_GPL(blk_crypto_register);
+
+void blk_crypto_unregister(struct request_queue *q)
+{
+ q->crypto_profile = NULL;
+}
+
+/**
+ * blk_crypto_intersect_capabilities() - restrict supported crypto capabilities
+ * by child device
+ * @parent: the crypto profile for the parent device
+ * @child: the crypto profile for the child device, or NULL
+ *
+ * This clears all crypto capabilities in @parent that aren't set in @child. If
+ * @child is NULL, then this clears all parent capabilities.
+ *
+ * Only use this when setting up the crypto profile for a layered device, before
+ * it's been exposed yet.
+ */
+void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
+ const struct blk_crypto_profile *child)
+{
+ if (child) {
+ unsigned int i;
+
+ parent->max_dun_bytes_supported =
+ min(parent->max_dun_bytes_supported,
+ child->max_dun_bytes_supported);
+ for (i = 0; i < ARRAY_SIZE(child->modes_supported); i++)
+ parent->modes_supported[i] &= child->modes_supported[i];
+ } else {
+ parent->max_dun_bytes_supported = 0;
+ memset(parent->modes_supported, 0,
+ sizeof(parent->modes_supported));
+ }
+}
+EXPORT_SYMBOL_GPL(blk_crypto_intersect_capabilities);
+
+/**
+ * blk_crypto_has_capabilities() - Check whether @target supports at least all
+ * the crypto capabilities that @reference does.
+ * @target: the target profile
+ * @reference: the reference profile
+ *
+ * Return: %true if @target supports all the crypto capabilities of @reference.
+ */
+bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
+ const struct blk_crypto_profile *reference)
+{
+ int i;
+
+ if (!reference)
+ return true;
+
+ if (!target)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(target->modes_supported); i++) {
+ if (reference->modes_supported[i] & ~target->modes_supported[i])
+ return false;
+ }
+
+ if (reference->max_dun_bytes_supported >
+ target->max_dun_bytes_supported)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(blk_crypto_has_capabilities);
+
+/**
+ * blk_crypto_update_capabilities() - Update the capabilities of a crypto
+ * profile to match those of another crypto
+ * profile.
+ * @dst: The crypto profile whose capabilities to update.
+ * @src: The crypto profile whose capabilities this function will update @dst's
+ * capabilities to.
+ *
+ * Blk-crypto requires that crypto capabilities that were
+ * advertised when a bio was created continue to be supported by the
+ * device until that bio is ended. This is turn means that a device cannot
+ * shrink its advertised crypto capabilities without any explicit
+ * synchronization with upper layers. So if there's no such explicit
+ * synchronization, @src must support all the crypto capabilities that
+ * @dst does (i.e. we need blk_crypto_has_capabilities(@src, @dst)).
+ *
+ * Note also that as long as the crypto capabilities are being expanded, the
+ * order of updates becoming visible is not important because it's alright
+ * for blk-crypto to see stale values - they only cause blk-crypto to
+ * believe that a crypto capability isn't supported when it actually is (which
+ * might result in blk-crypto-fallback being used if available, or the bio being
+ * failed).
+ */
+void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
+ const struct blk_crypto_profile *src)
+{
+ memcpy(dst->modes_supported, src->modes_supported,
+ sizeof(dst->modes_supported));
+
+ dst->max_dun_bytes_supported = src->max_dun_bytes_supported;
+}
+EXPORT_SYMBOL_GPL(blk_crypto_update_capabilities);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 8f53f4a1f9e2..ec9efeeeca91 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -11,7 +11,7 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -218,8 +218,9 @@ static bool bio_crypt_check_alignment(struct bio *bio)
blk_status_t __blk_crypto_init_request(struct request *rq)
{
- return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key,
- &rq->crypt_keyslot);
+ return blk_crypto_get_keyslot(rq->q->crypto_profile,
+ rq->crypt_ctx->bc_key,
+ &rq->crypt_keyslot);
}
/**
@@ -233,7 +234,7 @@ blk_status_t __blk_crypto_init_request(struct request *rq)
*/
void __blk_crypto_free_request(struct request *rq)
{
- blk_ksm_put_slot(rq->crypt_keyslot);
+ blk_crypto_put_keyslot(rq->crypt_keyslot);
mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
blk_crypto_rq_set_defaults(rq);
}
@@ -264,6 +265,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
{
struct bio *bio = *bio_ptr;
const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key;
+ struct blk_crypto_profile *profile;
/* Error if bio has no data. */
if (WARN_ON_ONCE(!bio_has_data(bio))) {
@@ -280,8 +282,8 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
* Success if device supports the encryption context, or if we succeeded
* in falling back to the crypto API.
*/
- if (blk_ksm_crypto_cfg_supported(bdev_get_queue(bio->bi_bdev)->ksm,
- &bc_key->crypto_cfg))
+ profile = bdev_get_queue(bio->bi_bdev)->crypto_profile;
+ if (__blk_crypto_cfg_supported(profile, &bc_key->crypto_cfg))
return true;
if (blk_crypto_fallback_bio_prep(bio_ptr))
@@ -357,7 +359,7 @@ bool blk_crypto_config_supported(struct request_queue *q,
const struct blk_crypto_config *cfg)
{
return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
- blk_ksm_crypto_cfg_supported(q->ksm, cfg);
+ __blk_crypto_cfg_supported(q->crypto_profile, cfg);
}
/**
@@ -378,7 +380,7 @@ bool blk_crypto_config_supported(struct request_queue *q,
int blk_crypto_start_using_key(const struct blk_crypto_key *key,
struct request_queue *q)
{
- if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
+ if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
return 0;
return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode);
}
@@ -394,18 +396,17 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
* evicted from any hardware that it might have been programmed into. The key
* must not be in use by any in-flight IO when this function is called.
*
- * Return: 0 on success or if key is not present in the q's ksm, -err on error.
+ * Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
*/
int blk_crypto_evict_key(struct request_queue *q,
const struct blk_crypto_key *key)
{
- if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
- return blk_ksm_evict_key(q->ksm, key);
+ if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
+ return __blk_crypto_evict_key(q->crypto_profile, key);
/*
- * If the request queue's associated inline encryption hardware didn't
- * have support for the key, then the key might have been programmed
- * into the fallback keyslot manager, so try to evict from there.
+ * If the request_queue didn't support the key, then blk-crypto-fallback
+ * may have been used, so try to evict the key from blk-crypto-fallback.
*/
return blk_crypto_fallback_evict_key(key);
}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 4201728bf3a5..8e364bda5166 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -379,7 +379,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
*/
-void blk_insert_flush(struct request *rq)
+bool blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
unsigned long fflags = q->queue_flags; /* may change, cache */
@@ -409,7 +409,7 @@ void blk_insert_flush(struct request *rq)
*/
if (!policy) {
blk_mq_end_request(rq, 0);
- return;
+ return true;
}
BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
@@ -420,10 +420,8 @@ void blk_insert_flush(struct request *rq)
* for normal execution.
*/
if ((policy & REQ_FSEQ_DATA) &&
- !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
- blk_mq_request_bypass_insert(rq, false, false);
- return;
- }
+ !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH)))
+ return false;
/*
* @rq should go through flush machinery. Mark it part of flush
@@ -439,6 +437,8 @@ void blk_insert_flush(struct request *rq)
spin_lock_irq(&fq->mq_flush_lock);
blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
spin_unlock_irq(&fq->mq_flush_lock);
+
+ return true;
}
/**
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
new file mode 100644
index 000000000000..c246c425d0d7
--- /dev/null
+++ b/block/blk-ia-ranges.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Block device concurrent positioning ranges.
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its Affiliates.
+ */
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include "blk.h"
+
+static ssize_t
+blk_ia_range_sector_show(struct blk_independent_access_range *iar,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n", iar->sector);
+}
+
+static ssize_t
+blk_ia_range_nr_sectors_show(struct blk_independent_access_range *iar,
+ char *buf)
+{
+ return sprintf(buf, "%llu\n", iar->nr_sectors);
+}
+
+struct blk_ia_range_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_independent_access_range *iar, char *buf);
+};
+
+static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = {
+ .attr = { .name = "sector", .mode = 0444 },
+ .show = blk_ia_range_sector_show,
+};
+
+static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = {
+ .attr = { .name = "nr_sectors", .mode = 0444 },
+ .show = blk_ia_range_nr_sectors_show,
+};
+
+static struct attribute *blk_ia_range_attrs[] = {
+ &blk_ia_range_sector_entry.attr,
+ &blk_ia_range_nr_sectors_entry.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(blk_ia_range);
+
+static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct blk_ia_range_sysfs_entry *entry =
+ container_of(attr, struct blk_ia_range_sysfs_entry, attr);
+ struct blk_independent_access_range *iar =
+ container_of(kobj, struct blk_independent_access_range, kobj);
+ ssize_t ret;
+
+ mutex_lock(&iar->queue->sysfs_lock);
+ ret = entry->show(iar, buf);
+ mutex_unlock(&iar->queue->sysfs_lock);
+
+ return ret;
+}
+
+static const struct sysfs_ops blk_ia_range_sysfs_ops = {
+ .show = blk_ia_range_sysfs_show,
+};
+
+/*
+ * Independent access range entries are not freed individually, but alltogether
+ * with struct blk_independent_access_ranges and its array of ranges. Since
+ * kobject_add() takes a reference on the parent kobject contained in
+ * struct blk_independent_access_ranges, the array of independent access range
+ * entries cannot be freed until kobject_del() is called for all entries.
+ * So we do not need to do anything here, but still need this no-op release
+ * operation to avoid complaints from the kobject code.
+ */
+static void blk_ia_range_sysfs_nop_release(struct kobject *kobj)
+{
+}
+
+static struct kobj_type blk_ia_range_ktype = {
+ .sysfs_ops = &blk_ia_range_sysfs_ops,
+ .default_groups = blk_ia_range_groups,
+ .release = blk_ia_range_sysfs_nop_release,
+};
+
+/*
+ * This will be executed only after all independent access range entries are
+ * removed with kobject_del(), at which point, it is safe to free everything,
+ * including the array of ranges.
+ */
+static void blk_ia_ranges_sysfs_release(struct kobject *kobj)
+{
+ struct blk_independent_access_ranges *iars =
+ container_of(kobj, struct blk_independent_access_ranges, kobj);
+
+ kfree(iars);
+}
+
+static struct kobj_type blk_ia_ranges_ktype = {
+ .release = blk_ia_ranges_sysfs_release,
+};
+
+/**
+ * disk_register_ia_ranges - register with sysfs a set of independent
+ * access ranges
+ * @disk: Target disk
+ * @new_iars: New set of independent access ranges
+ *
+ * Register with sysfs a set of independent access ranges for @disk.
+ * If @new_iars is not NULL, this set of ranges is registered and the old set
+ * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is
+ * registered if it is not already.
+ */
+int disk_register_independent_access_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *new_iars)
+{
+ struct request_queue *q = disk->queue;
+ struct blk_independent_access_ranges *iars;
+ int i, ret;
+
+ lockdep_assert_held(&q->sysfs_dir_lock);
+ lockdep_assert_held(&q->sysfs_lock);
+
+ /* If a new range set is specified, unregister the old one */
+ if (new_iars) {
+ if (q->ia_ranges)
+ disk_unregister_independent_access_ranges(disk);
+ q->ia_ranges = new_iars;
+ }
+
+ iars = q->ia_ranges;
+ if (!iars)
+ return 0;
+
+ /*
+ * At this point, iars is the new set of sector access ranges that needs
+ * to be registered with sysfs.
+ */
+ WARN_ON(iars->sysfs_registered);
+ ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
+ &q->kobj, "%s", "independent_access_ranges");
+ if (ret) {
+ q->ia_ranges = NULL;
+ kfree(iars);
+ return ret;
+ }
+
+ for (i = 0; i < iars->nr_ia_ranges; i++) {
+ iars->ia_range[i].queue = q;
+ ret = kobject_init_and_add(&iars->ia_range[i].kobj,
+ &blk_ia_range_ktype, &iars->kobj,
+ "%d", i);
+ if (ret) {
+ while (--i >= 0)
+ kobject_del(&iars->ia_range[i].kobj);
+ kobject_del(&iars->kobj);
+ kobject_put(&iars->kobj);
+ return ret;
+ }
+ }
+
+ iars->sysfs_registered = true;
+
+ return 0;
+}
+
+void disk_unregister_independent_access_ranges(struct gendisk *disk)
+{
+ struct request_queue *q = disk->queue;
+ struct blk_independent_access_ranges *iars = q->ia_ranges;
+ int i;
+
+ lockdep_assert_held(&q->sysfs_dir_lock);
+ lockdep_assert_held(&q->sysfs_lock);
+
+ if (!iars)
+ return;
+
+ if (iars->sysfs_registered) {
+ for (i = 0; i < iars->nr_ia_ranges; i++)
+ kobject_del(&iars->ia_range[i].kobj);
+ kobject_del(&iars->kobj);
+ kobject_put(&iars->kobj);
+ } else {
+ kfree(iars);
+ }
+
+ q->ia_ranges = NULL;
+}
+
+static struct blk_independent_access_range *
+disk_find_ia_range(struct blk_independent_access_ranges *iars,
+ sector_t sector)
+{
+ struct blk_independent_access_range *iar;
+ int i;
+
+ for (i = 0; i < iars->nr_ia_ranges; i++) {
+ iar = &iars->ia_range[i];
+ if (sector >= iar->sector &&
+ sector < iar->sector + iar->nr_sectors)
+ return iar;
+ }
+
+ return NULL;
+}
+
+static bool disk_check_ia_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *iars)
+{
+ struct blk_independent_access_range *iar, *tmp;
+ sector_t capacity = get_capacity(disk);
+ sector_t sector = 0;
+ int i;
+
+ /*
+ * While sorting the ranges in increasing LBA order, check that the
+ * ranges do not overlap, that there are no sector holes and that all
+ * sectors belong to one range.
+ */
+ for (i = 0; i < iars->nr_ia_ranges; i++) {
+ tmp = disk_find_ia_range(iars, sector);
+ if (!tmp || tmp->sector != sector) {
+ pr_warn("Invalid non-contiguous independent access ranges\n");
+ return false;
+ }
+
+ iar = &iars->ia_range[i];
+ if (tmp != iar) {
+ swap(iar->sector, tmp->sector);
+ swap(iar->nr_sectors, tmp->nr_sectors);
+ }
+
+ sector += iar->nr_sectors;
+ }
+
+ if (sector != capacity) {
+ pr_warn("Independent access ranges do not match disk capacity\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool disk_ia_ranges_changed(struct gendisk *disk,
+ struct blk_independent_access_ranges *new)
+{
+ struct blk_independent_access_ranges *old = disk->queue->ia_ranges;
+ int i;
+
+ if (!old)
+ return true;
+
+ if (old->nr_ia_ranges != new->nr_ia_ranges)
+ return true;
+
+ for (i = 0; i < old->nr_ia_ranges; i++) {
+ if (new->ia_range[i].sector != old->ia_range[i].sector ||
+ new->ia_range[i].nr_sectors != old->ia_range[i].nr_sectors)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * disk_alloc_independent_access_ranges - Allocate an independent access ranges
+ * data structure
+ * @disk: target disk
+ * @nr_ia_ranges: Number of independent access ranges
+ *
+ * Allocate a struct blk_independent_access_ranges structure with @nr_ia_ranges
+ * access range descriptors.
+ */
+struct blk_independent_access_ranges *
+disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges)
+{
+ struct blk_independent_access_ranges *iars;
+
+ iars = kzalloc_node(struct_size(iars, ia_range, nr_ia_ranges),
+ GFP_KERNEL, disk->queue->node);
+ if (iars)
+ iars->nr_ia_ranges = nr_ia_ranges;
+ return iars;
+}
+EXPORT_SYMBOL_GPL(disk_alloc_independent_access_ranges);
+
+/**
+ * disk_set_independent_access_ranges - Set a disk independent access ranges
+ * @disk: target disk
+ * @iars: independent access ranges structure
+ *
+ * Set the independent access ranges information of the request queue
+ * of @disk to @iars. If @iars is NULL and the independent access ranges
+ * structure already set is cleared. If there are no differences between
+ * @iars and the independent access ranges structure already set, @iars
+ * is freed.
+ */
+void disk_set_independent_access_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *iars)
+{
+ struct request_queue *q = disk->queue;
+
+ if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) {
+ kfree(iars);
+ iars = NULL;
+ }
+
+ mutex_lock(&q->sysfs_dir_lock);
+ mutex_lock(&q->sysfs_lock);
+
+ if (iars) {
+ if (!disk_check_ia_ranges(disk, iars)) {
+ kfree(iars);
+ iars = NULL;
+ goto reg;
+ }
+
+ if (!disk_ia_ranges_changed(disk, iars)) {
+ kfree(iars);
+ goto unlock;
+ }
+ }
+
+ /*
+ * This may be called for a registered queue. E.g. during a device
+ * revalidation. If that is the case, we need to unregister the old
+ * set of independent access ranges and register the new set. If the
+ * queue is not registered, registration of the device request queue
+ * will register the independent access ranges, so only swap in the
+ * new set and free the old one.
+ */
+reg:
+ if (blk_queue_registered(q)) {
+ disk_register_independent_access_ranges(disk, iars);
+ } else {
+ swap(q->ia_ranges, iars);
+ kfree(iars);
+ }
+
+unlock:
+ mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->sysfs_dir_lock);
+}
+EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index cef534a7cbc9..d670d54e5f7a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -409,9 +409,9 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
- if (disk->queue->ksm) {
+ if (disk->queue->crypto_profile) {
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
- blk_ksm_unregister(disk->queue);
+ blk_crypto_unregister(disk->queue);
}
#endif
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index ec727234ac48..893c1a60b701 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -383,7 +383,7 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
*/
void blk_queue_split(struct bio **bio)
{
- struct request_queue *q = (*bio)->bi_bdev->bd_disk->queue;
+ struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
unsigned int nr_segs;
if (blk_may_split(q, *bio))
@@ -1067,9 +1067,8 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @nr_segs: number of segments in @bio
- * @same_queue_rq: pointer to &struct request that gets filled in when
- * another request associated with @q is found on the plug list
- * (optional, may be %NULL)
+ * @same_queue_rq: output value, will be true if there's an existing request
+ * from the passed in @q already in the plug list
*
* Determine whether @bio being queued on @q can be merged with the previous
* request on %current's plugged list. Returns %true if merge was successful,
@@ -1085,26 +1084,28 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int nr_segs, struct request **same_queue_rq)
+ unsigned int nr_segs, bool *same_queue_rq)
{
struct blk_plug *plug;
struct request *rq;
plug = blk_mq_plug(q, bio);
- if (!plug || list_empty(&plug->mq_list))
+ if (!plug || rq_list_empty(plug->mq_list))
return false;
/* check the previously added entry for a quick merge attempt */
- rq = list_last_entry(&plug->mq_list, struct request, queuelist);
- if (rq->q == q && same_queue_rq) {
+ rq = rq_list_peek(&plug->mq_list);
+ if (rq->q == q) {
/*
* Only blk-mq multiple hardware queues case checks the rq in
* the same queue, there should be only one such rq in a queue
*/
- *same_queue_rq = rq;
+ *same_queue_rq = true;
+
+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK)
+ return true;
}
- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK)
- return true;
return false;
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 68ca5d21cda7..4f2cf8399f3d 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -124,7 +124,6 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(POLL_STATS),
QUEUE_FLAG_NAME(REGISTERED),
- QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
QUEUE_FLAG_NAME(PCI_P2PDMA),
QUEUE_FLAG_NAME(ZONE_RESETALL),
@@ -309,6 +308,7 @@ static const char *const rqf_name[] = {
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(MQ_POLL_SLEPT),
+ RQF_NAME(ELV),
};
#undef RQF_NAME
@@ -550,7 +550,7 @@ static int hctx_active_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
- seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
+ seq_printf(m, "%d\n", __blk_mq_active_requests(hctx));
return 0;
}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index e85b7556b096..4be652fa38e7 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -361,7 +361,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
}
}
-bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
+bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct elevator_queue *e = q->elevator;
@@ -370,15 +370,20 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
bool ret = false;
enum hctx_type type;
- if (e && e->type->ops.bio_merge)
- return e->type->ops.bio_merge(q, bio, nr_segs);
+ if (bio_queue_enter(bio))
+ return false;
+
+ if (e && e->type->ops.bio_merge) {
+ ret = e->type->ops.bio_merge(q, bio, nr_segs);
+ goto out_put;
+ }
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
list_empty_careful(&ctx->rq_lists[type]))
- return false;
+ goto out_put;
/* default per sw-queue merge */
spin_lock(&ctx->lock);
@@ -391,6 +396,8 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
ret = true;
spin_unlock(&ctx->lock);
+out_put:
+ blk_queue_exit(q);
return ret;
}
@@ -497,7 +504,7 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async) {
+ if (!hctx->dispatch_busy && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
if (list_empty(list))
goto out;
@@ -541,7 +548,7 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags) {
- if (!blk_mq_is_shared_tags(q->tag_set->flags))
+ if (!blk_mq_is_shared_tags(flags))
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 98836106b25f..25d1034952b6 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -12,7 +12,7 @@ void blk_mq_sched_assign_ioc(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request);
-bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
+bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs);
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq,
struct list_head *free);
@@ -43,16 +43,6 @@ static inline bool bio_mergeable(struct bio *bio)
}
static inline bool
-blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
- unsigned int nr_segs)
-{
- if (blk_queue_nomerges(q) || !bio_mergeable(bio))
- return false;
-
- return __blk_mq_sched_bio_merge(q, bio, nr_segs);
-}
-
-static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index b94c3e8ef392..995336abee33 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -399,9 +399,12 @@ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
busy_tag_iter_fn *fn, void *priv)
{
- int i;
+ unsigned int flags = tagset->flags;
+ int i, nr_tags;
+
+ nr_tags = blk_mq_is_shared_tags(flags) ? 1 : tagset->nr_hw_queues;
- for (i = 0; i < tagset->nr_hw_queues; i++) {
+ for (i = 0; i < nr_tags; i++) {
if (tagset->tags && tagset->tags[i])
__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
BT_TAG_ITER_STARTED);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 78ae2fb8e2a4..df787b5a23bd 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -4,29 +4,6 @@
struct blk_mq_alloc_data;
-/*
- * Tag address space map.
- */
-struct blk_mq_tags {
- unsigned int nr_tags;
- unsigned int nr_reserved_tags;
-
- atomic_t active_queues;
-
- struct sbitmap_queue bitmap_tags;
- struct sbitmap_queue breserved_tags;
-
- struct request **rqs;
- struct request **static_rqs;
- struct list_head page_list;
-
- /*
- * used to clear request reference in rqs[] before freeing one
- * request pool
- */
- spinlock_t lock;
-};
-
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
unsigned int reserved_tags,
int node, int alloc_policy);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9248edd8a7d3..629cf421417f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -19,7 +19,6 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/llist.h>
-#include <linux/list_sort.h>
#include <linux/cpu.h>
#include <linux/cache.h>
#include <linux/sched/sysctl.h>
@@ -242,27 +241,28 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
*/
void blk_mq_quiesce_queue_nowait(struct request_queue *q)
{
- blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ unsigned long flags;
+
+ spin_lock_irqsave(&q->queue_lock, flags);
+ if (!q->quiesce_depth++)
+ blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irqrestore(&q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
/**
- * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
* @q: request queue.
*
- * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Once this function is returned, we make
- * sure no dispatch can happen until the queue is unquiesced via
- * blk_mq_unquiesce_queue().
+ * Note: it is driver's responsibility for making sure that quiesce has
+ * been started.
*/
-void blk_mq_quiesce_queue(struct request_queue *q)
+void blk_mq_wait_quiesce_done(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
bool rcu = false;
- blk_mq_quiesce_queue_nowait(q);
-
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
synchronize_srcu(hctx->srcu);
@@ -272,6 +272,22 @@ void blk_mq_quiesce_queue(struct request_queue *q)
if (rcu)
synchronize_rcu();
}
+EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);
+
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+ blk_mq_quiesce_queue_nowait(q);
+ blk_mq_wait_quiesce_done(q);
+}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
/*
@@ -283,10 +299,21 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
*/
void blk_mq_unquiesce_queue(struct request_queue *q)
{
- blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+ unsigned long flags;
+ bool run_queue = false;
+
+ spin_lock_irqsave(&q->queue_lock, flags);
+ if (WARN_ON_ONCE(q->quiesce_depth <= 0)) {
+ ;
+ } else if (!--q->quiesce_depth) {
+ blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+ run_queue = true;
+ }
+ spin_unlock_irqrestore(&q->queue_lock, flags);
/* dispatch requests which are inserted during quiescing */
- blk_mq_run_hw_queues(q, true);
+ if (run_queue)
+ blk_mq_run_hw_queues(q, true);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
@@ -301,40 +328,37 @@ void blk_mq_wake_waiters(struct request_queue *q)
}
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
- unsigned int tag, u64 alloc_time_ns)
+ struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns)
{
struct blk_mq_ctx *ctx = data->ctx;
struct blk_mq_hw_ctx *hctx = data->hctx;
struct request_queue *q = data->q;
- struct elevator_queue *e = q->elevator;
- struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct request *rq = tags->static_rqs[tag];
- unsigned int rq_flags = 0;
- if (e) {
- rq_flags = RQF_ELV;
- rq->tag = BLK_MQ_NO_TAG;
- rq->internal_tag = tag;
- } else {
- rq->tag = tag;
- rq->internal_tag = BLK_MQ_NO_TAG;
- }
+ rq->q = q;
+ rq->mq_ctx = ctx;
+ rq->mq_hctx = hctx;
+ rq->cmd_flags = data->cmd_flags;
if (data->flags & BLK_MQ_REQ_PM)
- rq_flags |= RQF_PM;
+ data->rq_flags |= RQF_PM;
if (blk_queue_io_stat(q))
- rq_flags |= RQF_IO_STAT;
- rq->rq_flags = rq_flags;
+ data->rq_flags |= RQF_IO_STAT;
+ rq->rq_flags = data->rq_flags;
+
+ if (!(data->rq_flags & RQF_ELV)) {
+ rq->tag = tag;
+ rq->internal_tag = BLK_MQ_NO_TAG;
+ } else {
+ rq->tag = BLK_MQ_NO_TAG;
+ rq->internal_tag = tag;
+ }
+ rq->timeout = 0;
if (blk_mq_need_time_stamp(rq))
rq->start_time_ns = ktime_get_ns();
else
rq->start_time_ns = 0;
- /* csd/requeue_work/fifo_time is initialized before use */
- rq->q = q;
- rq->mq_ctx = ctx;
- rq->mq_hctx = hctx;
- rq->cmd_flags = data->cmd_flags;
rq->rq_disk = NULL;
rq->part = NULL;
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -346,7 +370,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
#if defined(CONFIG_BLK_DEV_INTEGRITY)
rq->nr_integrity_segments = 0;
#endif
- rq->timeout = 0;
rq->end_io = NULL;
rq->end_io_data = NULL;
@@ -381,22 +404,28 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
u64 alloc_time_ns)
{
unsigned int tag, tag_offset;
+ struct blk_mq_tags *tags;
struct request *rq;
- unsigned long tags;
+ unsigned long tag_mask;
int i, nr = 0;
- tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
- if (unlikely(!tags))
+ tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
+ if (unlikely(!tag_mask))
return NULL;
- for (i = 0; tags; i++) {
- if (!(tags & (1UL << i)))
+ tags = blk_mq_tags_from_data(data);
+ for (i = 0; tag_mask; i++) {
+ if (!(tag_mask & (1UL << i)))
continue;
tag = tag_offset + i;
- tags &= ~(1UL << i);
- rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+ prefetch(tags->static_rqs[tag]);
+ tag_mask &= ~(1UL << i);
+ rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns);
rq_list_add(data->cached_rq, rq);
+ nr++;
}
+ /* caller already holds a reference, add for remainder */
+ percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
data->nr_tags -= nr;
return rq_list_pop(data->cached_rq);
@@ -405,7 +434,6 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
{
struct request_queue *q = data->q;
- struct elevator_queue *e = q->elevator;
u64 alloc_time_ns = 0;
struct request *rq;
unsigned int tag;
@@ -417,7 +445,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
- if (e) {
+ if (q->elevator) {
+ struct elevator_queue *e = q->elevator;
+
+ data->rq_flags |= RQF_ELV;
+
/*
* Flush/passthrough requests are special and go directly to the
* dispatch list. Don't include reserved tags in the
@@ -433,7 +465,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
retry:
data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!e)
+ if (!(data->rq_flags & RQF_ELV))
blk_mq_tag_busy(data->hctx);
/*
@@ -465,7 +497,8 @@ retry:
goto retry;
}
- return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+ return blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag,
+ alloc_time_ns);
}
struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
@@ -544,12 +577,15 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!q->elevator)
blk_mq_tag_busy(data.hctx);
+ else
+ data.rq_flags |= RQF_ELV;
ret = -EWOULDBLOCK;
tag = blk_mq_get_tag(&data);
if (tag == BLK_MQ_NO_TAG)
goto out_queue_exit;
- return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns);
+ return blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag,
+ alloc_time_ns);
out_queue_exit:
blk_queue_exit(q);
@@ -580,7 +616,7 @@ void blk_mq_free_request(struct request *rq)
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
- if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) {
+ if (rq->rq_flags & RQF_ELVPRIV) {
struct elevator_queue *e = q->elevator;
if (e->type->ops.finish_request)
@@ -609,34 +645,30 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
{
struct request *rq;
- while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
- percpu_ref_get(&rq->q->q_usage_counter);
+ while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
blk_mq_free_request(rq);
- }
}
static void req_bio_endio(struct request *rq, struct bio *bio,
unsigned int nbytes, blk_status_t error)
{
- if (error)
+ if (unlikely(error)) {
bio->bi_status = error;
-
- if (unlikely(rq->rq_flags & RQF_QUIET))
- bio_set_flag(bio, BIO_QUIET);
-
- bio_advance(bio, nbytes);
-
- if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+ } else if (req_op(rq) == REQ_OP_ZONE_APPEND) {
/*
* Partial zone append completions cannot be supported as the
* BIO fragments may end up not being written sequentially.
*/
- if (bio->bi_iter.bi_size)
+ if (bio->bi_iter.bi_size != nbytes)
bio->bi_status = BLK_STS_IOERR;
else
bio->bi_iter.bi_sector = rq->__sector;
}
+ bio_advance(bio, nbytes);
+
+ if (unlikely(rq->rq_flags & RQF_QUIET))
+ bio_set_flag(bio, BIO_QUIET);
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
bio_endio(bio);
@@ -680,7 +712,7 @@ bool blk_update_request(struct request *req, blk_status_t error,
{
int total_bytes;
- trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
+ trace_block_rq_complete(req, error, nr_bytes);
if (!req->bio)
return false;
@@ -799,6 +831,13 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
+ /*
+ * All requests should have been marked as RQF_MQ_INFLIGHT, so
+ * update hctx->nr_active in batch
+ */
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_sub_active_requests(hctx, nr_tags);
+
blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
percpu_ref_put_many(&q->q_usage_counter, nr_tags);
}
@@ -806,7 +845,7 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
void blk_mq_end_request_batch(struct io_comp_batch *iob)
{
int tags[TAG_COMP_BATCH], nr_tags = 0;
- struct blk_mq_hw_ctx *last_hctx = NULL;
+ struct blk_mq_hw_ctx *cur_hctx = NULL;
struct request *rq;
u64 now = 0;
@@ -829,17 +868,17 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
blk_pm_mark_last_busy(rq);
rq_qos_done(rq->q, rq);
- if (nr_tags == TAG_COMP_BATCH ||
- (last_hctx && last_hctx != rq->mq_hctx)) {
- blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+ if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {
+ if (cur_hctx)
+ blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags);
nr_tags = 0;
+ cur_hctx = rq->mq_hctx;
}
tags[nr_tags++] = rq->tag;
- last_hctx = rq->mq_hctx;
}
if (nr_tags)
- blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+ blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags);
}
EXPORT_SYMBOL_GPL(blk_mq_end_request_batch);
@@ -1040,7 +1079,6 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
/* this request will be re-inserted to io scheduler queue */
blk_mq_sched_requeue_request(rq);
- BUG_ON(!list_empty(&rq->queuelist));
blk_mq_add_to_requeue_list(rq, true, kick_requeue_list);
}
EXPORT_SYMBOL(blk_mq_requeue_request);
@@ -1121,17 +1159,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
-{
- if (tag < tags->nr_tags) {
- prefetch(tags->rqs[tag]);
- return tags->rqs[tag];
- }
-
- return NULL;
-}
-EXPORT_SYMBOL(blk_mq_tag_to_rq);
-
static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
void *priv, bool reserved)
{
@@ -1336,7 +1363,7 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
return data.rq;
}
-static bool __blk_mq_get_driver_tag(struct request *rq)
+static bool __blk_mq_alloc_driver_tag(struct request *rq)
{
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
@@ -1360,11 +1387,9 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
return true;
}
-bool blk_mq_get_driver_tag(struct request *rq)
+bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
-
- if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
+ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
return false;
if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
@@ -1594,6 +1619,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
int errors, queued;
blk_status_t ret = BLK_STS_OK;
LIST_HEAD(zone_list);
+ bool needs_resource = false;
if (list_empty(list))
return false;
@@ -1639,6 +1665,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
queued++;
break;
case BLK_STS_RESOURCE:
+ needs_resource = true;
+ fallthrough;
case BLK_STS_DEV_RESOURCE:
blk_mq_handle_dev_resource(rq, list);
goto out;
@@ -1649,6 +1677,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
* accept.
*/
blk_mq_handle_zone_resource(rq, &zone_list);
+ needs_resource = true;
break;
default:
errors++;
@@ -1673,7 +1702,6 @@ out:
/* For non-shared tags, the RESTART check will suffice */
bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
- bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET;
if (nr_budgets)
blk_mq_release_budgets(q, list);
@@ -1714,14 +1742,16 @@ out:
* If driver returns BLK_STS_RESOURCE and SCHED_RESTART
* bit is set, run queue after a delay to avoid IO stalls
* that could otherwise occur if the queue is idle. We'll do
- * similar if we couldn't get budget and SCHED_RESTART is set.
+ * similar if we couldn't get budget or couldn't lock a zone
+ * and SCHED_RESTART is set.
*/
needs_restart = blk_mq_sched_needs_restart(hctx);
+ if (prep == PREP_DISPATCH_NO_BUDGET)
+ needs_resource = true;
if (!needs_restart ||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true);
- else if (needs_restart && (ret == BLK_STS_RESOURCE ||
- no_budget_avail))
+ else if (needs_restart && needs_resource)
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
blk_mq_update_dispatch_busy(hctx, true);
@@ -2161,54 +2191,106 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
spin_unlock(&ctx->lock);
}
-static int plug_rq_cmp(void *priv, const struct list_head *a,
- const struct list_head *b)
+static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
+ bool from_schedule)
+{
+ if (hctx->queue->mq_ops->commit_rqs) {
+ trace_block_unplug(hctx->queue, *queued, !from_schedule);
+ hctx->queue->mq_ops->commit_rqs(hctx);
+ }
+ *queued = 0;
+}
+
+static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
{
- struct request *rqa = container_of(a, struct request, queuelist);
- struct request *rqb = container_of(b, struct request, queuelist);
+ struct blk_mq_hw_ctx *hctx = NULL;
+ struct request *rq;
+ int queued = 0;
+ int errors = 0;
- if (rqa->mq_ctx != rqb->mq_ctx)
- return rqa->mq_ctx > rqb->mq_ctx;
- if (rqa->mq_hctx != rqb->mq_hctx)
- return rqa->mq_hctx > rqb->mq_hctx;
+ while ((rq = rq_list_pop(&plug->mq_list))) {
+ bool last = rq_list_empty(plug->mq_list);
+ blk_status_t ret;
+
+ if (hctx != rq->mq_hctx) {
+ if (hctx)
+ blk_mq_commit_rqs(hctx, &queued, from_schedule);
+ hctx = rq->mq_hctx;
+ }
+
+ ret = blk_mq_request_issue_directly(rq, last);
+ switch (ret) {
+ case BLK_STS_OK:
+ queued++;
+ break;
+ case BLK_STS_RESOURCE:
+ case BLK_STS_DEV_RESOURCE:
+ blk_mq_request_bypass_insert(rq, false, last);
+ blk_mq_commit_rqs(hctx, &queued, from_schedule);
+ return;
+ default:
+ blk_mq_end_request(rq, ret);
+ errors++;
+ break;
+ }
+ }
- return blk_rq_pos(rqa) > blk_rq_pos(rqb);
+ /*
+ * If we didn't flush the entire list, we could have told the driver
+ * there was more coming, but that turned out to be a lie.
+ */
+ if (errors)
+ blk_mq_commit_rqs(hctx, &queued, from_schedule);
}
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
+ struct blk_mq_hw_ctx *this_hctx;
+ struct blk_mq_ctx *this_ctx;
+ unsigned int depth;
LIST_HEAD(list);
- if (list_empty(&plug->mq_list))
+ if (rq_list_empty(plug->mq_list))
return;
- list_splice_init(&plug->mq_list, &list);
-
- if (plug->rq_count > 2 && plug->multiple_queues)
- list_sort(NULL, &list, plug_rq_cmp);
-
plug->rq_count = 0;
+ if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
+ blk_mq_plug_issue_direct(plug, false);
+ if (rq_list_empty(plug->mq_list))
+ return;
+ }
+
+ this_hctx = NULL;
+ this_ctx = NULL;
+ depth = 0;
do {
- struct list_head rq_list;
- struct request *rq, *head_rq = list_entry_rq(list.next);
- struct list_head *pos = &head_rq->queuelist; /* skip first */
- struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
- struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
- unsigned int depth = 1;
-
- list_for_each_continue(pos, &list) {
- rq = list_entry_rq(pos);
- BUG_ON(!rq->q);
- if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
- break;
- depth++;
+ struct request *rq;
+
+ rq = rq_list_pop(&plug->mq_list);
+
+ if (!this_hctx) {
+ this_hctx = rq->mq_hctx;
+ this_ctx = rq->mq_ctx;
+ } else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+ trace_block_unplug(this_hctx->queue, depth,
+ !from_schedule);
+ blk_mq_sched_insert_requests(this_hctx, this_ctx,
+ &list, from_schedule);
+ depth = 0;
+ this_hctx = rq->mq_hctx;
+ this_ctx = rq->mq_ctx;
+
}
- list_cut_before(&rq_list, &list, pos);
- trace_block_unplug(head_rq->q, depth, !from_schedule);
- blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
+ list_add(&rq->queuelist, &list);
+ depth++;
+ } while (!rq_list_empty(plug->mq_list));
+
+ if (!list_empty(&list)) {
+ trace_block_unplug(this_hctx->queue, depth, !from_schedule);
+ blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
from_schedule);
- } while(!list_empty(&list));
+ }
}
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -2388,16 +2470,17 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
- list_add_tail(&rq->queuelist, &plug->mq_list);
- plug->rq_count++;
- if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
- struct request *tmp;
+ if (!plug->multiple_queues) {
+ struct request *nxt = rq_list_peek(&plug->mq_list);
- tmp = list_first_entry(&plug->mq_list, struct request,
- queuelist);
- if (tmp->q != rq->q)
+ if (nxt && nxt->q != rq->q)
plug->multiple_queues = true;
}
+ if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ plug->has_elevator = true;
+ rq->rq_next = NULL;
+ rq_list_add(&plug->mq_list, rq);
+ plug->rq_count++;
}
/*
@@ -2412,6 +2495,83 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
return BLK_MAX_REQUEST_COUNT;
}
+static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
+ unsigned int nr_segs, bool *same_queue_rq)
+{
+ if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
+ if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
+ return true;
+ if (blk_mq_sched_bio_merge(q, bio, nr_segs))
+ return true;
+ }
+ return false;
+}
+
+static struct request *blk_mq_get_new_requests(struct request_queue *q,
+ struct blk_plug *plug,
+ struct bio *bio,
+ unsigned int nsegs,
+ bool *same_queue_rq)
+{
+ struct blk_mq_alloc_data data = {
+ .q = q,
+ .nr_tags = 1,
+ .cmd_flags = bio->bi_opf,
+ };
+ struct request *rq;
+
+ if (unlikely(bio_queue_enter(bio)))
+ return NULL;
+ if (unlikely(!submit_bio_checks(bio)))
+ goto put_exit;
+ if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+ goto put_exit;
+
+ rq_qos_throttle(q, bio);
+
+ if (plug) {
+ data.nr_tags = plug->nr_ios;
+ plug->nr_ios = 1;
+ data.cached_rq = &plug->cached_rq;
+ }
+
+ rq = __blk_mq_alloc_requests(&data);
+ if (rq)
+ return rq;
+
+ rq_qos_cleanup(q, bio);
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
+put_exit:
+ blk_queue_exit(q);
+ return NULL;
+}
+
+static inline struct request *blk_mq_get_request(struct request_queue *q,
+ struct blk_plug *plug,
+ struct bio *bio,
+ unsigned int nsegs,
+ bool *same_queue_rq)
+{
+ if (plug) {
+ struct request *rq;
+
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q == q) {
+ if (unlikely(!submit_bio_checks(bio)))
+ return NULL;
+ if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+ return NULL;
+ plug->cached_rq = rq_list_next(rq);
+ INIT_LIST_HEAD(&rq->queuelist);
+ rq_qos_throttle(q, bio);
+ return rq;
+ }
+ }
+
+ return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2429,53 +2589,26 @@ void blk_mq_submit_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
const int is_sync = op_is_sync(bio->bi_opf);
- const int is_flush_fua = op_is_flush(bio->bi_opf);
struct request *rq;
struct blk_plug *plug;
- struct request *same_queue_rq = NULL;
+ bool same_queue_rq = false;
unsigned int nr_segs = 1;
blk_status_t ret;
+ if (unlikely(!blk_crypto_bio_prep(&bio)))
+ return;
+
blk_queue_bounce(q, &bio);
if (blk_may_split(q, bio))
__blk_queue_split(q, &bio, &nr_segs);
if (!bio_integrity_prep(bio))
- goto queue_exit;
-
- if (!is_flush_fua && !blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
- goto queue_exit;
-
- if (blk_mq_sched_bio_merge(q, bio, nr_segs))
- goto queue_exit;
-
- rq_qos_throttle(q, bio);
+ return;
plug = blk_mq_plug(q, bio);
- if (plug && plug->cached_rq) {
- rq = rq_list_pop(&plug->cached_rq);
- INIT_LIST_HEAD(&rq->queuelist);
- } else {
- struct blk_mq_alloc_data data = {
- .q = q,
- .nr_tags = 1,
- .cmd_flags = bio->bi_opf,
- };
-
- if (plug) {
- data.nr_tags = plug->nr_ios;
- plug->nr_ios = 1;
- data.cached_rq = &plug->cached_rq;
- }
- rq = __blk_mq_alloc_requests(&data);
- if (unlikely(!rq)) {
- rq_qos_cleanup(q, bio);
- if (bio->bi_opf & REQ_NOWAIT)
- bio_wouldblock_error(bio);
- goto queue_exit;
- }
- }
+ rq = blk_mq_get_request(q, plug, bio, nr_segs, &same_queue_rq);
+ if (unlikely(!rq))
+ return;
trace_block_getrq(bio);
@@ -2491,14 +2624,12 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
- if (unlikely(is_flush_fua)) {
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
- /* Bypass scheduler for flush requests */
- blk_insert_flush(rq);
- blk_mq_run_hw_queue(hctx, true);
- } else if (plug && (q->nr_hw_queues == 1 ||
- blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
- q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
+ if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
+ return;
+
+ if (plug && (q->nr_hw_queues == 1 ||
+ blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
+ q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
/*
* Use plugging if we have a ->commit_rqs() hook as well, as
* we know the driver uses bd->last in a smart fashion.
@@ -2509,14 +2640,16 @@ void blk_mq_submit_bio(struct bio *bio)
unsigned int request_count = plug->rq_count;
struct request *last = NULL;
- if (!request_count)
+ if (!request_count) {
trace_block_plug(q);
- else
- last = list_entry_rq(plug->mq_list.prev);
+ } else if (!blk_queue_nomerges(q)) {
+ last = rq_list_peek(&plug->mq_list);
+ if (blk_rq_bytes(last) < BLK_PLUG_FLUSH_SIZE)
+ last = NULL;
+ }
- if (request_count >= blk_plug_max_rq_count(plug) || (last &&
- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
- blk_flush_plug_list(plug, false);
+ if (request_count >= blk_plug_max_rq_count(plug) || last) {
+ blk_mq_flush_plug_list(plug, false);
trace_block_plug(q);
}
@@ -2525,6 +2658,8 @@ void blk_mq_submit_bio(struct bio *bio)
/* Insert the request at the IO scheduler queue */
blk_mq_sched_insert_request(rq, false, true, true);
} else if (plug && !blk_queue_nomerges(q)) {
+ struct request *next_rq = NULL;
+
/*
* We do limited plugging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be
@@ -2532,19 +2667,16 @@ void blk_mq_submit_bio(struct bio *bio)
* The plug list might get flushed before this. If that happens,
* the plug list is empty, and same_queue_rq is invalid.
*/
- if (list_empty(&plug->mq_list))
- same_queue_rq = NULL;
if (same_queue_rq) {
- list_del_init(&same_queue_rq->queuelist);
+ next_rq = rq_list_pop(&plug->mq_list);
plug->rq_count--;
}
blk_add_rq_to_plug(plug, rq);
trace_block_plug(q);
- if (same_queue_rq) {
+ if (next_rq) {
trace_block_unplug(q, 1, true);
- blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
- same_queue_rq);
+ blk_mq_try_issue_directly(next_rq->mq_hctx, next_rq);
}
} else if ((q->nr_hw_queues > 1 && is_sync) ||
!rq->mq_hctx->dispatch_busy) {
@@ -2557,10 +2689,6 @@ void blk_mq_submit_bio(struct bio *bio)
/* Default case. */
blk_mq_sched_insert_request(rq, false, true, true);
}
-
- return;
-queue_exit:
- blk_queue_exit(q);
}
static size_t order_to_size(unsigned int order)
@@ -3546,7 +3674,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx = hctxs[j];
if (hctx) {
- __blk_mq_free_map_and_rqs(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
hctxs[j] = NULL;
}
@@ -4054,8 +4181,13 @@ fallback:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
+ int i = prev_nr_hw_queues;
+
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
nr_hw_queues, prev_nr_hw_queues);
+ for (; i < set->nr_hw_queues; i++)
+ __blk_mq_free_map_and_rqs(set, i);
+
set->nr_hw_queues = prev_nr_hw_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
goto fallback;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ebf67f4d4f2e..cb0b5482ca5e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -122,6 +122,7 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_free_plug_rqs(struct blk_plug *plug);
+void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_release(struct request_queue *q);
@@ -148,6 +149,7 @@ struct blk_mq_alloc_data {
blk_mq_req_flags_t flags;
unsigned int shallow_depth;
unsigned int cmd_flags;
+ unsigned int rq_flags;
/* allocate multiple requests/tags in one go */
unsigned int nr_tags;
@@ -165,10 +167,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{
- if (data->q->elevator)
- return data->hctx->sched_tags;
-
- return data->hctx->tags;
+ if (!(data->rq_flags & RQF_ELV))
+ return data->hctx->tags;
+ return data->hctx->sched_tags;
}
static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
@@ -224,12 +225,18 @@ static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
atomic_inc(&hctx->nr_active);
}
-static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
+static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
+ int val)
{
if (blk_mq_is_shared_tags(hctx->flags))
- atomic_dec(&hctx->queue->nr_active_requests_shared_tags);
+ atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags);
else
- atomic_dec(&hctx->nr_active);
+ atomic_sub(val, &hctx->nr_active);
+}
+
+static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
+{
+ __blk_mq_sub_active_requests(hctx, 1);
}
static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
@@ -258,7 +265,20 @@ static inline void blk_mq_put_driver_tag(struct request *rq)
__blk_mq_put_driver_tag(rq->mq_hctx, rq);
}
-bool blk_mq_get_driver_tag(struct request *rq);
+bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq);
+
+static inline bool blk_mq_get_driver_tag(struct request *rq)
+{
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ if (rq->tag != BLK_MQ_NO_TAG &&
+ !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
+ hctx->tags->rqs[rq->tag] = rq;
+ return true;
+ }
+
+ return __blk_mq_get_driver_tag(hctx, rq);
+}
static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
{
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a7c857ad7d10..b880c70e22e4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
+static bool disk_has_partitions(struct gendisk *disk)
+{
+ unsigned long idx;
+ struct block_device *part;
+ bool ret = false;
+
+ rcu_read_lock();
+ xa_for_each(&disk->part_tbl, idx, part) {
+ if (bdev_is_partition(part)) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
/**
* blk_queue_set_zoned - configure a disk queue zoned model.
* @disk: the gendisk of the queue to configure
@@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
* we do nothing special as far as the block layer is concerned.
*/
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
- !xa_empty(&disk->part_tbl))
+ disk_has_partitions(disk))
model = BLK_ZONED_NONE;
break;
case BLK_ZONED_NONE:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 36f14d658e81..cef1f713370b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -873,16 +873,15 @@ int blk_register_queue(struct gendisk *disk)
}
mutex_lock(&q->sysfs_lock);
+
+ ret = disk_register_independent_access_ranges(disk, NULL);
+ if (ret)
+ goto put_dev;
+
if (q->elevator) {
ret = elv_register_queue(q, false);
- if (ret) {
- mutex_unlock(&q->sysfs_lock);
- mutex_unlock(&q->sysfs_dir_lock);
- kobject_del(&q->kobj);
- blk_trace_remove_sysfs(dev);
- kobject_put(&dev->kobj);
- return ret;
- }
+ if (ret)
+ goto put_dev;
}
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
@@ -914,6 +913,16 @@ unlock:
}
return ret;
+
+put_dev:
+ disk_unregister_independent_access_ranges(disk);
+ mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->sysfs_dir_lock);
+ kobject_del(&q->kobj);
+ blk_trace_remove_sysfs(dev);
+ kobject_put(&dev->kobj);
+
+ return ret;
}
/**
@@ -958,6 +967,7 @@ void blk_unregister_queue(struct gendisk *disk)
mutex_lock(&q->sysfs_lock);
if (q->elevator)
elv_unregister_queue(q);
+ disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
mutex_unlock(&q->sysfs_dir_lock);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 874c1c37bf0c..0c119be0e813 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -357,6 +357,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
unsigned int inflight = wbt_inflight(rwb);
int status;
+ if (!rwb->rqos.q->disk)
+ return;
+
status = latency_exceeded(rwb, cb->stat);
trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
diff --git a/block/blk.h b/block/blk.h
index e80350327e6d..b4fed2033e48 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -55,6 +55,41 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
+int __bio_queue_enter(struct request_queue *q, struct bio *bio);
+bool submit_bio_checks(struct bio *bio);
+
+static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
+{
+ rcu_read_lock();
+ if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
+ goto fail;
+
+ /*
+ * The code that increments the pm_only counter must ensure that the
+ * counter is globally visible before the queue is unfrozen.
+ */
+ if (blk_queue_pm_only(q) &&
+ (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
+ goto fail_put;
+
+ rcu_read_unlock();
+ return true;
+
+fail_put:
+ blk_queue_exit(q);
+fail:
+ rcu_read_unlock();
+ return false;
+}
+
+static inline int bio_queue_enter(struct bio *bio)
+{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+ if (blk_try_enter_queue(q, false))
+ return 0;
+ return __bio_queue_enter(q, bio);
+}
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
@@ -218,7 +253,7 @@ void blk_add_timer(struct request *req);
void blk_print_req_error(struct request *req, blk_status_t status);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int nr_segs, struct request **same_queue_rq);
+ unsigned int nr_segs, bool *same_queue_rq);
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs);
@@ -236,7 +271,7 @@ void __blk_account_io_done(struct request *req, u64 now);
*/
#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)
-void blk_insert_flush(struct request *rq);
+bool blk_insert_flush(struct request *rq);
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e);
@@ -454,4 +489,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
extern const struct address_space_operations def_blk_aops;
+int disk_register_independent_access_ranges(struct gendisk *disk,
+ struct blk_independent_access_ranges *new_iars);
+void disk_unregister_independent_access_ranges(struct gendisk *disk);
+
#endif /* BLK_INTERNAL_H */
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index ccb98276c964..10aa378702fa 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -31,6 +31,7 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
struct bsg_job *job;
struct request *rq;
struct bio *bio;
+ void *reply;
int ret;
if (hdr->protocol != BSG_PROTOCOL_SCSI ||
@@ -39,22 +40,28 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
- rq = blk_get_request(q, hdr->dout_xfer_len ?
+ rq = blk_mq_alloc_request(q, hdr->dout_xfer_len ?
REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
if (IS_ERR(rq))
return PTR_ERR(rq);
rq->timeout = timeout;
job = blk_mq_rq_to_pdu(rq);
+ reply = job->reply;
+ memset(job, 0, sizeof(*job));
+ job->reply = reply;
+ job->reply_len = SCSI_SENSE_BUFFERSIZE;
+ job->dd_data = job + 1;
+
job->request_len = hdr->request_len;
job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
if (IS_ERR(job->request)) {
ret = PTR_ERR(job->request);
- goto out_put_request;
+ goto out_free_rq;
}
if (hdr->dout_xfer_len && hdr->din_xfer_len) {
- job->bidi_rq = blk_get_request(rq->q, REQ_OP_DRV_IN, 0);
+ job->bidi_rq = blk_mq_alloc_request(rq->q, REQ_OP_DRV_IN, 0);
if (IS_ERR(job->bidi_rq)) {
ret = PTR_ERR(job->bidi_rq);
goto out_free_job_request;
@@ -134,11 +141,11 @@ out_unmap_bidi_rq:
blk_rq_unmap_user(job->bidi_bio);
out_free_bidi_rq:
if (job->bidi_rq)
- blk_put_request(job->bidi_rq);
+ blk_mq_free_request(job->bidi_rq);
out_free_job_request:
kfree(job->request);
-out_put_request:
- blk_put_request(rq);
+out_free_rq:
+ blk_mq_free_request(rq);
return ret;
}
@@ -302,18 +309,6 @@ static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req,
return 0;
}
-/* called right before the request is given to the request_queue user */
-static void bsg_initialize_rq(struct request *req)
-{
- struct bsg_job *job = blk_mq_rq_to_pdu(req);
- void *reply = job->reply;
-
- memset(job, 0, sizeof(*job));
- job->reply = reply;
- job->reply_len = SCSI_SENSE_BUFFERSIZE;
- job->dd_data = job + 1;
-}
-
static void bsg_exit_rq(struct blk_mq_tag_set *set, struct request *req,
unsigned int hctx_idx)
{
@@ -350,7 +345,6 @@ static const struct blk_mq_ops bsg_mq_ops = {
.queue_rq = bsg_queue_rq,
.init_request = bsg_init_rq,
.exit_request = bsg_exit_rq,
- .initialize_rq_fn = bsg_initialize_rq,
.complete = bsg_complete,
.timeout = bsg_timeout,
};
diff --git a/block/fops.c b/block/fops.c
index 2c43e493e37c..ad732a36f9b3 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -76,7 +76,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, vecs, nr_pages);
bio_set_dev(&bio, bdev);
- bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_write_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
@@ -124,9 +124,8 @@ out:
}
enum {
- DIO_MULTI_BIO = 1,
- DIO_SHOULD_DIRTY = 2,
- DIO_IS_SYNC = 4,
+ DIO_SHOULD_DIRTY = 1,
+ DIO_IS_SYNC = 2,
};
struct blkdev_dio {
@@ -137,7 +136,7 @@ struct blkdev_dio {
size_t size;
atomic_t ref;
unsigned int flags;
- struct bio bio;
+ struct bio bio ____cacheline_aligned_in_smp;
};
static struct bio_set blkdev_dio_pool;
@@ -150,7 +149,7 @@ static void blkdev_bio_end_io(struct bio *bio)
if (bio->bi_status && !dio->bio.bi_status)
dio->bio.bi_status = bio->bi_status;
- if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) {
+ if (atomic_dec_and_test(&dio->ref)) {
if (!(dio->flags & DIO_IS_SYNC)) {
struct kiocb *iocb = dio->iocb;
ssize_t ret;
@@ -164,9 +163,8 @@ static void blkdev_bio_end_io(struct bio *bio)
ret = blk_status_to_errno(dio->bio.bi_status);
}
- dio->iocb->ki_complete(iocb, ret, 0);
- if (dio->flags & DIO_MULTI_BIO)
- bio_put(&dio->bio);
+ dio->iocb->ki_complete(iocb, ret);
+ bio_put(&dio->bio);
} else {
struct task_struct *waiter = dio->waiter;
@@ -190,7 +188,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
- bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
int ret = 0;
@@ -202,11 +199,17 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
dio = container_of(bio, struct blkdev_dio, bio);
+ atomic_set(&dio->ref, 1);
+ /*
+ * Grab an extra reference to ensure the dio structure which is embedded
+ * into the first bio stays around.
+ */
+ bio_get(bio);
+
is_sync = is_sync_kiocb(iocb);
if (is_sync) {
dio->flags = DIO_IS_SYNC;
dio->waiter = current;
- bio_get(bio);
} else {
dio->flags = 0;
dio->iocb = iocb;
@@ -216,16 +219,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (is_read && iter_is_iovec(iter))
dio->flags |= DIO_SHOULD_DIRTY;
- /*
- * Don't plug for HIPRI/polled IO, as those should go straight
- * to issue
- */
- if (!(iocb->ki_flags & IOCB_HIPRI))
- blk_start_plug(&plug);
+ blk_start_plug(&plug);
for (;;) {
bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
@@ -254,34 +252,15 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
if (!nr_pages) {
- if (do_poll)
- bio_set_polled(bio, iocb);
submit_bio(bio);
- if (do_poll)
- WRITE_ONCE(iocb->private, bio);
break;
}
- if (!(dio->flags & DIO_MULTI_BIO)) {
- /*
- * AIO needs an extra reference to ensure the dio
- * structure which is embedded into the first bio
- * stays around.
- */
- if (!is_sync)
- bio_get(bio);
- dio->flags |= DIO_MULTI_BIO;
- atomic_set(&dio->ref, 2);
- do_poll = false;
- } else {
- atomic_inc(&dio->ref);
- }
-
+ atomic_inc(&dio->ref);
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
- if (!(iocb->ki_flags & IOCB_HIPRI))
- blk_finish_plug(&plug);
+ blk_finish_plug(&plug);
if (!is_sync)
return -EIOCBQUEUED;
@@ -290,9 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(dio->waiter))
break;
-
- if (!do_poll || !bio_poll(bio, NULL, 0))
- blk_io_schedule();
+ blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -305,6 +282,94 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
return ret;
}
+static void blkdev_bio_end_io_async(struct bio *bio)
+{
+ struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
+ struct kiocb *iocb = dio->iocb;
+ ssize_t ret;
+
+ if (likely(!bio->bi_status)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(bio->bi_status);
+ }
+
+ iocb->ki_complete(iocb, ret);
+
+ if (dio->flags & DIO_SHOULD_DIRTY) {
+ bio_check_pages_dirty(bio);
+ } else {
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ }
+}
+
+static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
+ struct iov_iter *iter,
+ unsigned int nr_pages)
+{
+ struct block_device *bdev = iocb->ki_filp->private_data;
+ struct blkdev_dio *dio;
+ struct bio *bio;
+ loff_t pos = iocb->ki_pos;
+ int ret = 0;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+ dio = container_of(bio, struct blkdev_dio, bio);
+ dio->flags = 0;
+ dio->iocb = iocb;
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = iocb->ki_hint;
+ bio->bi_end_io = blkdev_bio_end_io_async;
+ bio->bi_ioprio = iocb->ki_ioprio;
+
+ if (iov_iter_is_bvec(iter)) {
+ /*
+ * Users don't rely on the iterator being in any particular
+ * state for async I/O returning -EIOCBQUEUED, hence we can
+ * avoid expensive iov_iter_advance(). Bypass
+ * bio_iov_iter_get_pages() and set the bvec directly.
+ */
+ bio_iov_bvec_set(bio, iter);
+ } else {
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return ret;
+ }
+ }
+ dio->size = bio->bi_iter.bi_size;
+
+ if (iov_iter_rw(iter) == READ) {
+ bio->bi_opf = REQ_OP_READ;
+ if (iter_is_iovec(iter)) {
+ dio->flags |= DIO_SHOULD_DIRTY;
+ bio_set_pages_dirty(bio);
+ }
+ } else {
+ bio->bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(bio->bi_iter.bi_size);
+ }
+
+ if (iocb->ki_flags & IOCB_HIPRI) {
+ bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+ submit_bio(bio);
+ WRITE_ONCE(iocb->private, bio);
+ } else {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
+ submit_bio(bio);
+ }
+ return -EIOCBQUEUED;
+}
+
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
unsigned int nr_pages;
@@ -313,9 +378,11 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return 0;
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
- return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
+ if (likely(nr_pages <= BIO_MAX_VECS)) {
+ if (is_sync_kiocb(iocb))
+ return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+ return __blkdev_direct_IO_async(iocb, iter, nr_pages);
+ }
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
}
@@ -460,7 +527,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct block_device *bdev = iocb->ki_filp->private_data;
struct inode *bd_inode = bdev->bd_inode;
- loff_t size = i_size_read(bd_inode);
+ loff_t size = bdev_nr_bytes(bdev);
struct blk_plug plug;
size_t shorted = 0;
ssize_t ret;
@@ -498,22 +565,25 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct block_device *bdev = iocb->ki_filp->private_data;
- loff_t size = i_size_read(bdev->bd_inode);
+ loff_t size = bdev_nr_bytes(bdev);
loff_t pos = iocb->ki_pos;
size_t shorted = 0;
ssize_t ret;
- if (pos >= size)
- return 0;
-
- size -= pos;
- if (iov_iter_count(to) > size) {
- shorted = iov_iter_count(to) - size;
- iov_iter_truncate(to, size);
+ if (unlikely(pos + iov_iter_count(to) > size)) {
+ if (pos >= size)
+ return 0;
+ size -= pos;
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
}
ret = generic_file_read_iter(iocb, to);
- iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+
+ if (unlikely(shorted))
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
return ret;
}
@@ -535,7 +605,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
return -EOPNOTSUPP;
/* Don't go off the end of the device. */
- isize = i_size_read(bdev->bd_inode);
+ isize = bdev_nr_bytes(bdev);
if (start >= isize)
return -EINVAL;
if (end >= isize) {
@@ -562,16 +632,18 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
switch (mode) {
case FALLOC_FL_ZERO_RANGE:
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
+ error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
+ len >> SECTOR_SHIFT, GFP_KERNEL,
+ BLKDEV_ZERO_NOUNMAP);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
+ error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
+ len >> SECTOR_SHIFT, GFP_KERNEL,
+ BLKDEV_ZERO_NOFALLBACK);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
- error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
- GFP_KERNEL, 0);
+ error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
+ len >> SECTOR_SHIFT, GFP_KERNEL, 0);
break;
default:
error = -EOPNOTSUPP;
diff --git a/block/genhd.c b/block/genhd.c
index 13494ed79675..ca2fbab1d425 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors)
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+ bdev->bd_nr_sectors = sectors;
spin_unlock(&bdev->bd_size_lock);
}
EXPORT_SYMBOL(set_capacity);
@@ -471,11 +472,15 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
disk->part0->bd_holder_dir =
kobject_create_and_add("holders", &ddev->kobj);
- if (!disk->part0->bd_holder_dir)
+ if (!disk->part0->bd_holder_dir) {
+ ret = -ENOMEM;
goto out_del_integrity;
+ }
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
- if (!disk->slave_dir)
+ if (!disk->slave_dir) {
+ ret = -ENOMEM;
goto out_put_holder_dir;
+ }
ret = bd_register_pending_holders(disk);
if (ret < 0)
@@ -592,16 +597,6 @@ void del_gendisk(struct gendisk *disk)
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(q);
- blk_mq_freeze_queue_wait(q);
-
- rq_qos_exit(q);
- blk_sync_queue(q);
- blk_flush_integrity();
- /*
- * Allow using passthrough request again after the queue is torn down.
- */
- blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
- __blk_mq_unfreeze_queue(q, true);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
@@ -624,9 +619,41 @@ void del_gendisk(struct gendisk *disk)
sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
+
+ blk_mq_freeze_queue_wait(q);
+
+ rq_qos_exit(q);
+ blk_sync_queue(q);
+ blk_flush_integrity();
+ /*
+ * Allow using passthrough request again after the queue is torn down.
+ */
+ blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
+ __blk_mq_unfreeze_queue(q, true);
+
}
EXPORT_SYMBOL(del_gendisk);
+/**
+ * invalidate_disk - invalidate the disk
+ * @disk: the struct gendisk to invalidate
+ *
+ * A helper to invalidates the disk. It will clean the disk's associated
+ * buffer/page caches and reset its internal states so that the disk
+ * can be reused by the drivers.
+ *
+ * Context: can sleep
+ */
+void invalidate_disk(struct gendisk *disk)
+{
+ struct block_device *bdev = disk->part0;
+
+ invalidate_bdev(bdev);
+ bdev->bd_inode->i_mapping->wb_err = 0;
+ set_capacity(disk, 0);
+}
+EXPORT_SYMBOL(invalidate_disk);
+
/* sysfs access to bad-blocks list. */
static ssize_t disk_badblocks_show(struct device *dev,
struct device_attribute *attr,
@@ -1392,12 +1419,6 @@ void set_disk_ro(struct gendisk *disk, bool read_only)
}
EXPORT_SYMBOL(set_disk_ro);
-int bdev_read_only(struct block_device *bdev)
-{
- return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
-}
-EXPORT_SYMBOL(bdev_read_only);
-
void inc_diskseq(struct gendisk *disk)
{
disk->diskseq = atomic64_inc_return(&diskseq);
diff --git a/block/ioctl.c b/block/ioctl.c
index 77b1b2453f39..d6af0ac97e57 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -132,7 +132,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
if (len & 511)
return -EINVAL;
- if (start + len > i_size_read(bdev->bd_inode))
+ if (start + len > bdev_nr_bytes(bdev))
return -EINVAL;
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
@@ -164,7 +164,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
return -EINVAL;
if (len & 511)
return -EINVAL;
- if (end >= (uint64_t)i_size_read(bdev->bd_inode))
+ if (end >= (uint64_t)bdev_nr_bytes(bdev))
return -EINVAL;
if (end < start)
return -EINVAL;
@@ -543,7 +543,6 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct block_device *bdev = I_BDEV(file->f_mapping->host);
void __user *argp = (void __user *)arg;
fmode_t mode = file->f_mode;
- loff_t size;
int ret;
/*
@@ -570,10 +569,9 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return put_long(argp,
(bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
- size = i_size_read(bdev->bd_inode);
- if ((size >> 9) > ~0UL)
+ if (bdev_nr_sectors(bdev) > ~0UL)
return -EFBIG;
- return put_ulong(argp, size >> 9);
+ return put_ulong(argp, bdev_nr_sectors(bdev));
/* The data is compatible, but the command number is different */
case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
@@ -581,7 +579,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case BLKBSZSET:
return blkdev_bszset(bdev, mode, argp);
case BLKGETSIZE64:
- return put_u64(argp, i_size_read(bdev->bd_inode));
+ return put_u64(argp, bdev_nr_bytes(bdev));
/* Incompatible alignment on i386 */
case BLKTRACESETUP:
@@ -615,7 +613,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct gendisk *disk = bdev->bd_disk;
fmode_t mode = file->f_mode;
- loff_t size;
/*
* O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
@@ -641,10 +638,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return compat_put_long(argp,
(bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512);
case BLKGETSIZE:
- size = i_size_read(bdev->bd_inode);
- if ((size >> 9) > ~0UL)
+ if (bdev_nr_sectors(bdev) > ~0UL)
return -EFBIG;
- return compat_put_ulong(argp, size >> 9);
+ return compat_put_ulong(argp, bdev_nr_sectors(bdev));
/* The data is compatible, but the command number is different */
case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
@@ -652,7 +648,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case BLKBSZSET_32:
return blkdev_bszset(bdev, mode, argp);
case BLKGETSIZE64_32:
- return put_u64(argp, i_size_read(bdev->bd_inode));
+ return put_u64(argp, bdev_nr_bytes(bdev));
/* Incompatible alignment on i386 */
case BLKTRACESETUP32:
diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c
deleted file mode 100644
index 1792159d12d1..000000000000
--- a/block/keyslot-manager.c
+++ /dev/null
@@ -1,579 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2019 Google LLC
- */
-
-/**
- * DOC: The Keyslot Manager
- *
- * Many devices with inline encryption support have a limited number of "slots"
- * into which encryption contexts may be programmed, and requests can be tagged
- * with a slot number to specify the key to use for en/decryption.
- *
- * As the number of slots is limited, and programming keys is expensive on
- * many inline encryption hardware, we don't want to program the same key into
- * multiple slots - if multiple requests are using the same key, we want to
- * program just one slot with that key and use that slot for all requests.
- *
- * The keyslot manager manages these keyslots appropriately, and also acts as
- * an abstraction between the inline encryption hardware and the upper layers.
- *
- * Lower layer devices will set up a keyslot manager in their request queue
- * and tell it how to perform device specific operations like programming/
- * evicting keys from keyslots.
- *
- * Upper layers will call blk_ksm_get_slot_for_key() to program a
- * key into some slot in the inline encryption hardware.
- */
-
-#define pr_fmt(fmt) "blk-crypto: " fmt
-
-#include <linux/keyslot-manager.h>
-#include <linux/device.h>
-#include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/pm_runtime.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/blk-integrity.h>
-
-struct blk_ksm_keyslot {
- atomic_t slot_refs;
- struct list_head idle_slot_node;
- struct hlist_node hash_node;
- const struct blk_crypto_key *key;
- struct blk_keyslot_manager *ksm;
-};
-
-static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm)
-{
- /*
- * Calling into the driver requires ksm->lock held and the device
- * resumed. But we must resume the device first, since that can acquire
- * and release ksm->lock via blk_ksm_reprogram_all_keys().
- */
- if (ksm->dev)
- pm_runtime_get_sync(ksm->dev);
- down_write(&ksm->lock);
-}
-
-static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm)
-{
- up_write(&ksm->lock);
- if (ksm->dev)
- pm_runtime_put_sync(ksm->dev);
-}
-
-static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm)
-{
- return ksm->num_slots == 0;
-}
-
-/**
- * blk_ksm_init() - Initialize a keyslot manager
- * @ksm: The keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
- *
- * Allocate memory for keyslots and initialize a keyslot manager. Called by
- * e.g. storage drivers to set up a keyslot manager in their request_queue.
- *
- * Return: 0 on success, or else a negative error code.
- */
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
-{
- unsigned int slot;
- unsigned int i;
- unsigned int slot_hashtable_size;
-
- memset(ksm, 0, sizeof(*ksm));
-
- if (num_slots == 0)
- return -EINVAL;
-
- ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL);
- if (!ksm->slots)
- return -ENOMEM;
-
- ksm->num_slots = num_slots;
-
- init_rwsem(&ksm->lock);
-
- init_waitqueue_head(&ksm->idle_slots_wait_queue);
- INIT_LIST_HEAD(&ksm->idle_slots);
-
- for (slot = 0; slot < num_slots; slot++) {
- ksm->slots[slot].ksm = ksm;
- list_add_tail(&ksm->slots[slot].idle_slot_node,
- &ksm->idle_slots);
- }
-
- spin_lock_init(&ksm->idle_slots_lock);
-
- slot_hashtable_size = roundup_pow_of_two(num_slots);
- /*
- * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2
- * buckets. This only makes a difference when there is only 1 keyslot.
- */
- if (slot_hashtable_size < 2)
- slot_hashtable_size = 2;
-
- ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
- ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
- sizeof(ksm->slot_hashtable[0]),
- GFP_KERNEL);
- if (!ksm->slot_hashtable)
- goto err_destroy_ksm;
- for (i = 0; i < slot_hashtable_size; i++)
- INIT_HLIST_HEAD(&ksm->slot_hashtable[i]);
-
- return 0;
-
-err_destroy_ksm:
- blk_ksm_destroy(ksm);
- return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_init);
-
-static void blk_ksm_destroy_callback(void *ksm)
-{
- blk_ksm_destroy(ksm);
-}
-
-/**
- * devm_blk_ksm_init() - Resource-managed blk_ksm_init()
- * @dev: The device which owns the blk_keyslot_manager.
- * @ksm: The blk_keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
- *
- * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically
- * on driver detach.
- *
- * Return: 0 on success, or else a negative error code.
- */
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
- unsigned int num_slots)
-{
- int err = blk_ksm_init(ksm, num_slots);
-
- if (err)
- return err;
-
- return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm);
-}
-EXPORT_SYMBOL_GPL(devm_blk_ksm_init);
-
-static inline struct hlist_head *
-blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key)
-{
- return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)];
-}
-
-static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot)
-{
- struct blk_keyslot_manager *ksm = slot->ksm;
- unsigned long flags;
-
- spin_lock_irqsave(&ksm->idle_slots_lock, flags);
- list_del(&slot->idle_slot_node);
- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
-}
-
-static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
- struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key)
-{
- const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key);
- struct blk_ksm_keyslot *slotp;
-
- hlist_for_each_entry(slotp, head, hash_node) {
- if (slotp->key == key)
- return slotp;
- }
- return NULL;
-}
-
-static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot(
- struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key)
-{
- struct blk_ksm_keyslot *slot;
-
- slot = blk_ksm_find_keyslot(ksm, key);
- if (!slot)
- return NULL;
- if (atomic_inc_return(&slot->slot_refs) == 1) {
- /* Took first reference to this slot; remove it from LRU list */
- blk_ksm_remove_slot_from_lru_list(slot);
- }
- return slot;
-}
-
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot)
-{
- return slot - slot->ksm->slots;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx);
-
-/**
- * blk_ksm_get_slot_for_key() - Program a key into a keyslot.
- * @ksm: The keyslot manager to program the key into.
- * @key: Pointer to the key object to program, including the raw key, crypto
- * mode, and data unit size.
- * @slot_ptr: A pointer to return the pointer of the allocated keyslot.
- *
- * Get a keyslot that's been programmed with the specified key. If one already
- * exists, return it with incremented refcount. Otherwise, wait for a keyslot
- * to become idle and program it.
- *
- * Context: Process context. Takes and releases ksm->lock.
- * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the
- * allocated keyslot), or some other blk_status_t otherwise (and
- * keyslot is set to NULL).
- */
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key,
- struct blk_ksm_keyslot **slot_ptr)
-{
- struct blk_ksm_keyslot *slot;
- int slot_idx;
- int err;
-
- *slot_ptr = NULL;
-
- if (blk_ksm_is_passthrough(ksm))
- return BLK_STS_OK;
-
- down_read(&ksm->lock);
- slot = blk_ksm_find_and_grab_keyslot(ksm, key);
- up_read(&ksm->lock);
- if (slot)
- goto success;
-
- for (;;) {
- blk_ksm_hw_enter(ksm);
- slot = blk_ksm_find_and_grab_keyslot(ksm, key);
- if (slot) {
- blk_ksm_hw_exit(ksm);
- goto success;
- }
-
- /*
- * If we're here, that means there wasn't a slot that was
- * already programmed with the key. So try to program it.
- */
- if (!list_empty(&ksm->idle_slots))
- break;
-
- blk_ksm_hw_exit(ksm);
- wait_event(ksm->idle_slots_wait_queue,
- !list_empty(&ksm->idle_slots));
- }
-
- slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot,
- idle_slot_node);
- slot_idx = blk_ksm_get_slot_idx(slot);
-
- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx);
- if (err) {
- wake_up(&ksm->idle_slots_wait_queue);
- blk_ksm_hw_exit(ksm);
- return errno_to_blk_status(err);
- }
-
- /* Move this slot to the hash list for the new key. */
- if (slot->key)
- hlist_del(&slot->hash_node);
- slot->key = key;
- hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key));
-
- atomic_set(&slot->slot_refs, 1);
-
- blk_ksm_remove_slot_from_lru_list(slot);
-
- blk_ksm_hw_exit(ksm);
-success:
- *slot_ptr = slot;
- return BLK_STS_OK;
-}
-
-/**
- * blk_ksm_put_slot() - Release a reference to a slot
- * @slot: The keyslot to release the reference of.
- *
- * Context: Any context.
- */
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot)
-{
- struct blk_keyslot_manager *ksm;
- unsigned long flags;
-
- if (!slot)
- return;
-
- ksm = slot->ksm;
-
- if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
- &ksm->idle_slots_lock, flags)) {
- list_add_tail(&slot->idle_slot_node, &ksm->idle_slots);
- spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
- wake_up(&ksm->idle_slots_wait_queue);
- }
-}
-
-/**
- * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is
- * supported by a ksm.
- * @ksm: The keyslot manager to check
- * @cfg: The crypto configuration to check for.
- *
- * Checks for crypto_mode/data unit size/dun bytes support.
- *
- * Return: Whether or not this ksm supports the specified crypto config.
- */
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_config *cfg)
-{
- if (!ksm)
- return false;
- if (!(ksm->crypto_modes_supported[cfg->crypto_mode] &
- cfg->data_unit_size))
- return false;
- if (ksm->max_dun_bytes_supported < cfg->dun_bytes)
- return false;
- return true;
-}
-
-/**
- * blk_ksm_evict_key() - Evict a key from the lower layer device.
- * @ksm: The keyslot manager to evict from
- * @key: The key to evict
- *
- * Find the keyslot that the specified key was programmed into, and evict that
- * slot from the lower layer device. The slot must not be in use by any
- * in-flight IO when this function is called.
- *
- * Context: Process context. Takes and releases ksm->lock.
- * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
- * if the keyslot is still in use, or another -errno value on other
- * error.
- */
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
- const struct blk_crypto_key *key)
-{
- struct blk_ksm_keyslot *slot;
- int err = 0;
-
- if (blk_ksm_is_passthrough(ksm)) {
- if (ksm->ksm_ll_ops.keyslot_evict) {
- blk_ksm_hw_enter(ksm);
- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1);
- blk_ksm_hw_exit(ksm);
- return err;
- }
- return 0;
- }
-
- blk_ksm_hw_enter(ksm);
- slot = blk_ksm_find_keyslot(ksm, key);
- if (!slot)
- goto out_unlock;
-
- if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
- err = -EBUSY;
- goto out_unlock;
- }
- err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
- blk_ksm_get_slot_idx(slot));
- if (err)
- goto out_unlock;
-
- hlist_del(&slot->hash_node);
- slot->key = NULL;
- err = 0;
-out_unlock:
- blk_ksm_hw_exit(ksm);
- return err;
-}
-
-/**
- * blk_ksm_reprogram_all_keys() - Re-program all keyslots.
- * @ksm: The keyslot manager
- *
- * Re-program all keyslots that are supposed to have a key programmed. This is
- * intended only for use by drivers for hardware that loses its keys on reset.
- *
- * Context: Process context. Takes and releases ksm->lock.
- */
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm)
-{
- unsigned int slot;
-
- if (blk_ksm_is_passthrough(ksm))
- return;
-
- /* This is for device initialization, so don't resume the device */
- down_write(&ksm->lock);
- for (slot = 0; slot < ksm->num_slots; slot++) {
- const struct blk_crypto_key *key = ksm->slots[slot].key;
- int err;
-
- if (!key)
- continue;
-
- err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot);
- WARN_ON(err);
- }
- up_write(&ksm->lock);
-}
-EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys);
-
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm)
-{
- if (!ksm)
- return;
- kvfree(ksm->slot_hashtable);
- kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots);
- memzero_explicit(ksm, sizeof(*ksm));
-}
-EXPORT_SYMBOL_GPL(blk_ksm_destroy);
-
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q)
-{
- if (blk_integrity_queue_supports_integrity(q)) {
- pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
- return false;
- }
- q->ksm = ksm;
- return true;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_register);
-
-void blk_ksm_unregister(struct request_queue *q)
-{
- q->ksm = NULL;
-}
-
-/**
- * blk_ksm_intersect_modes() - restrict supported modes by child device
- * @parent: The keyslot manager for parent device
- * @child: The keyslot manager for child device, or NULL
- *
- * Clear any crypto mode support bits in @parent that aren't set in @child.
- * If @child is NULL, then all parent bits are cleared.
- *
- * Only use this when setting up the keyslot manager for a layered device,
- * before it's been exposed yet.
- */
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
- const struct blk_keyslot_manager *child)
-{
- if (child) {
- unsigned int i;
-
- parent->max_dun_bytes_supported =
- min(parent->max_dun_bytes_supported,
- child->max_dun_bytes_supported);
- for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported);
- i++) {
- parent->crypto_modes_supported[i] &=
- child->crypto_modes_supported[i];
- }
- } else {
- parent->max_dun_bytes_supported = 0;
- memset(parent->crypto_modes_supported, 0,
- sizeof(parent->crypto_modes_supported));
- }
-}
-EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes);
-
-/**
- * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes
- * and DUN bytes that another KSM supports. Here,
- * "superset" refers to the mathematical meaning of the
- * word - i.e. if two KSMs have the *same* capabilities,
- * they *are* considered supersets of each other.
- * @ksm_superset: The KSM that we want to verify is a superset
- * @ksm_subset: The KSM that we want to verify is a subset
- *
- * Return: True if @ksm_superset supports a superset of the crypto modes and DUN
- * bytes that @ksm_subset supports.
- */
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
- struct blk_keyslot_manager *ksm_subset)
-{
- int i;
-
- if (!ksm_subset)
- return true;
-
- if (!ksm_superset)
- return false;
-
- for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) {
- if (ksm_subset->crypto_modes_supported[i] &
- (~ksm_superset->crypto_modes_supported[i])) {
- return false;
- }
- }
-
- if (ksm_subset->max_dun_bytes_supported >
- ksm_superset->max_dun_bytes_supported) {
- return false;
- }
-
- return true;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_is_superset);
-
-/**
- * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of
- * another KSM
- * @target_ksm: The KSM whose restrictions to update.
- * @reference_ksm: The KSM to whose restrictions this function will update
- * @target_ksm's restrictions to.
- *
- * Blk-crypto requires that crypto capabilities that were
- * advertised when a bio was created continue to be supported by the
- * device until that bio is ended. This is turn means that a device cannot
- * shrink its advertised crypto capabilities without any explicit
- * synchronization with upper layers. So if there's no such explicit
- * synchronization, @reference_ksm must support all the crypto capabilities that
- * @target_ksm does
- * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true).
- *
- * Note also that as long as the crypto capabilities are being expanded, the
- * order of updates becoming visible is not important because it's alright
- * for blk-crypto to see stale values - they only cause blk-crypto to
- * believe that a crypto capability isn't supported when it actually is (which
- * might result in blk-crypto-fallback being used if available, or the bio being
- * failed).
- */
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
- struct blk_keyslot_manager *reference_ksm)
-{
- memcpy(target_ksm->crypto_modes_supported,
- reference_ksm->crypto_modes_supported,
- sizeof(target_ksm->crypto_modes_supported));
-
- target_ksm->max_dun_bytes_supported =
- reference_ksm->max_dun_bytes_supported;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities);
-
-/**
- * blk_ksm_init_passthrough() - Init a passthrough keyslot manager
- * @ksm: The keyslot manager to init
- *
- * Initialize a passthrough keyslot manager.
- * Called by e.g. storage drivers to set up a keyslot manager in their
- * request_queue, when the storage driver wants to manage its keys by itself.
- * This is useful for inline encryption hardware that doesn't have the concept
- * of keyslots, and for layered devices.
- */
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm)
-{
- memset(ksm, 0, sizeof(*ksm));
- init_rwsem(&ksm->lock);
-}
-EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 9dbddc355b40..334b72ef1d73 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+ bdev->bd_nr_sectors = sectors;
spin_unlock(&bdev->bd_size_lock);
}
@@ -424,6 +425,7 @@ out_del:
device_del(pdev);
out_put:
put_device(pdev);
+ return ERR_PTR(err);
out_put_disk:
put_disk(disk);
return ERR_PTR(err);
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index 7ca5c4c374d4..5e9be13a56a8 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -133,7 +133,7 @@ efi_crc32(const void *buf, unsigned long len)
*/
static u64 last_lba(struct gendisk *disk)
{
- return div_u64(disk->part0->bd_inode->i_size,
+ return div_u64(bdev_nr_bytes(disk->part0),
queue_logical_block_size(disk->queue)) - 1ULL;
}
diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c
index 9bca396aef4a..403756dbd50d 100644
--- a/block/partitions/ibm.c
+++ b/block/partitions/ibm.c
@@ -198,7 +198,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
char name[],
union label_t *label,
sector_t labelsect,
- loff_t i_size,
+ sector_t nr_sectors,
dasd_information2_t *info)
{
loff_t offset, geo_size, size;
@@ -213,14 +213,14 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
} else {
/*
* Formated w/o large volume support. If the sanity check
- * 'size based on geo == size based on i_size' is true, then
+ * 'size based on geo == size based on nr_sectors' is true, then
* we can safely assume that we know the formatted size of
* the disk, otherwise we need additional information
* that we can only get from a real DASD device.
*/
geo_size = geo->cylinders * geo->heads
* geo->sectors * secperblk;
- size = i_size >> 9;
+ size = nr_sectors;
if (size != geo_size) {
if (!info) {
strlcat(state->pp_buf, "\n", PAGE_SIZE);
@@ -229,7 +229,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state,
if (!strcmp(info->type, "ECKD"))
if (geo_size < size)
size = geo_size;
- /* else keep size based on i_size */
+ /* else keep size based on nr_sectors */
}
}
/* first and only partition starts in the first block after the label */
@@ -293,7 +293,8 @@ int ibm_partition(struct parsed_partitions *state)
struct gendisk *disk = state->disk;
struct block_device *bdev = disk->part0;
int blocksize, res;
- loff_t i_size, offset, size;
+ loff_t offset, size;
+ sector_t nr_sectors;
dasd_information2_t *info;
struct hd_geometry *geo;
char type[5] = {0,};
@@ -308,8 +309,8 @@ int ibm_partition(struct parsed_partitions *state)
blocksize = bdev_logical_block_size(bdev);
if (blocksize <= 0)
goto out_symbol;
- i_size = i_size_read(bdev->bd_inode);
- if (i_size == 0)
+ nr_sectors = bdev_nr_sectors(bdev);
+ if (nr_sectors == 0)
goto out_symbol;
info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
if (info == NULL)
@@ -336,7 +337,7 @@ int ibm_partition(struct parsed_partitions *state)
label);
} else if (!strncmp(type, "LNX1", 4)) {
res = find_lnx1_partitions(state, geo, blocksize, name,
- label, labelsect, i_size,
+ label, labelsect, nr_sectors,
info);
} else if (!strncmp(type, "CMS1", 4)) {
res = find_cms1_partitions(state, geo, blocksize, name,
@@ -353,7 +354,7 @@ int ibm_partition(struct parsed_partitions *state)
res = 1;
if (info->format == DASD_FORMAT_LDL) {
strlcat(state->pp_buf, "(nonl)", PAGE_SIZE);
- size = i_size >> 9;
+ size = nr_sectors;
offset = (info->label_block + 1) * (blocksize >> 9);
put_partition(state, 1, offset, size-offset);
strlcat(state->pp_buf, "\n", PAGE_SIZE);