summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-11-07 06:33:05 -0700
committerJens Axboe <axboe@kernel.dk>2019-11-07 06:33:05 -0700
commit439b84fa17e96f6d171ad691370b110bf76adc2c (patch)
tree90f905efbc8712a196ab9423e91f977207b8a6c7
parent64fab7290dc3561729bbc1e35895a517eb2e549e (diff)
parente876df1fe0ad1b191284ee6ed2db7960bd322d00 (diff)
Merge branch 'for-5.5/block' into for-5.5/drivers
Pull in dependencies for the new zoned open/close/finish support. * for-5.5/block: (32 commits) block: add zone open, close and finish ioctl support block: add zone open, close and finish operations block: Simplify REQ_OP_ZONE_RESET_ALL handling block: Remove REQ_OP_ZONE_RESET plugging block: Warn if elevator= parameter is used block: avoid blk_bio_segment_split for small I/O operations blk-mq: make sure that line break can be printed block: sed-opal: Introduce Opal Datastore UID block: sed-opal: Add support to read/write opal tables generically block: sed-opal: Generalizing write data to any opal table bdev: Refresh bdev size for disks without partitioning bdev: Factor out bdev revalidation into a common helper blk-mq: avoid sysfs buffer overflow with too many CPU cores blk-mq: Make blk_mq_run_hw_queue() return void fcntl: fix typo in RWH_WRITE_LIFE_NOT_SET r/w hint name blk-mq: fill header with kernel-doc blk-mq: remove needless goto from blk_mq_get_driver_tag block: reorder bio::__bi_remaining for better packing block: Reduce the amount of memory used for tag sets block: Reduce the amount of memory required per request queue ...
-rw-r--r--block/blk-core.c16
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-merge.c16
-rw-r--r--block/blk-mq-sysfs.c31
-rw-r--r--block/blk-mq.c130
-rw-r--r--block/blk-mq.h9
-rw-r--r--block/blk-stat.c7
-rw-r--r--block/blk-sysfs.c8
-rw-r--r--block/blk-zoned.c99
-rw-r--r--block/blk.h7
-rw-r--r--block/elevator.c9
-rw-r--r--block/ioctl.c5
-rw-r--r--block/opal_proto.h2
-rw-r--r--block/sed-opal.c312
-rw-r--r--block/t10-pi.c8
-rw-r--r--drivers/md/dm-zoned-metadata.c6
-rw-r--r--fs/block_dev.c37
-rw-r--r--fs/f2fs/segment.c3
-rw-r--r--fs/fcntl.c2
-rw-r--r--include/linux/blk-mq.h299
-rw-r--r--include/linux/blk_types.h27
-rw-r--r--include/linux/blkdev.h16
-rw-r--r--include/linux/sed-opal.h1
-rw-r--r--include/trace/events/wbt.h12
-rw-r--r--include/uapi/linux/blkzoned.h17
-rw-r--r--include/uapi/linux/fcntl.h9
-rw-r--r--include/uapi/linux/sed-opal.h20
-rw-r--r--tools/include/uapi/linux/fcntl.h9
28 files changed, 788 insertions, 331 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d5e668ec751b..f0d82227a2fc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
REQ_OP_NAME(ZONE_RESET_ALL),
+ REQ_OP_NAME(ZONE_OPEN),
+ REQ_OP_NAME(ZONE_CLOSE),
+ REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(blk_queue_registered(q));
+
/* mark @q DYING, no new request or merges will be allowed afterwards */
- mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- mutex_unlock(&q->sysfs_lock);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio)
goto out;
/*
- * Zone reset does not include bi_size so bio_sectors() is always 0.
- * Include a test for the reset op code and perform the remap if needed.
+ * Zone management bios do not have a sector count but they do have
+ * a start sector filled out and need to be remapped.
*/
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+ if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
@@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 1db44ca0f4a6..e20a852ae432 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
rq->rq_disk = bd_disk;
rq->end_io = done;
+ blk_account_io_start(rq, true);
+
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 48e6725b32ee..f22cb6251d06 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -293,7 +293,7 @@ split:
void __blk_queue_split(struct request_queue *q, struct bio **bio,
unsigned int *nr_segs)
{
- struct bio *split;
+ struct bio *split = NULL;
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
@@ -309,6 +309,20 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
nr_segs);
break;
default:
+ /*
+ * All drivers must accept single-segments bios that are <=
+ * PAGE_SIZE. This is a quick and dirty check that relies on
+ * the fact that bi_io_vec[0] is always valid if a bio has data.
+ * The check might lead to occasional false negatives when bios
+ * are cloned, but compared to the performance impact of cloned
+ * bios themselves the loop below doesn't matter anyway.
+ */
+ if (!q->limits.chunk_sectors &&
+ (*bio)->bi_vcnt == 1 &&
+ (*bio)->bi_io_vec[0].bv_len <= PAGE_SIZE) {
+ *nr_segs = 1;
+ break;
+ }
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
break;
}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a0d3ce30fa08..062229395a50 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
if (!entry->show)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->show(ctx, page);
+ res = entry->show(ctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->store(ctx, page, length);
+ res = entry->store(ctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
if (!entry->show)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->show(hctx, page);
+ res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
if (!entry->store)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->store(hctx, page, length);
+ res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
+ const size_t size = PAGE_SIZE - 1;
unsigned int i, first = 1;
- ssize_t ret = 0;
+ int ret = 0, pos = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
- ret += sprintf(ret + page, "%u", i);
+ ret = snprintf(pos + page, size - pos, "%u", i);
else
- ret += sprintf(ret + page, ", %u", i);
+ ret = snprintf(pos + page, size - pos, ", %u", i);
+
+ if (ret >= size - pos)
+ break;
first = 0;
+ pos += ret;
}
- ret += sprintf(ret + page, "\n");
- return ret;
+ ret = snprintf(pos + page, size + 1 - pos, "\n");
+ return pos + ret;
}
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ec791156e9cc..5c9adcaa27ac 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
struct mq_inflight {
struct hd_struct *part;
- unsigned int *inflight;
+ unsigned int inflight[2];
};
static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
@@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
- /*
- * index[0] counts the specific partition that was asked for.
- */
if (rq->part == mi->part)
- mi->inflight[0]++;
+ mi->inflight[rq_data_dir(rq)]++;
return true;
}
unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
{
- unsigned inflight[2];
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = { .part = part };
- inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
- return inflight[0];
-}
-
-static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
- struct request *rq, void *priv,
- bool reserved)
-{
- struct mq_inflight *mi = priv;
-
- if (rq->part == mi->part)
- mi->inflight[rq_data_dir(rq)]++;
-
- return true;
+ return mi.inflight[0] + mi.inflight[1];
}
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = { .part = part };
- inflight[0] = inflight[1] = 0;
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
+ inflight[0] = mi.inflight[0];
+ inflight[1] = mi.inflight[1];
}
void blk_freeze_queue_start(struct request_queue *q)
@@ -663,18 +647,6 @@ bool blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
-int blk_mq_request_started(struct request *rq)
-{
- return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_started);
-
-int blk_mq_request_completed(struct request *rq)
-{
- return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_completed);
-
void blk_mq_start_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -1064,7 +1036,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
bool shared;
if (rq->tag != -1)
- goto done;
+ return true;
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
data.flags |= BLK_MQ_REQ_RESERVED;
@@ -1079,7 +1051,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
data.hctx->tags->rqs[rq->tag] = rq;
}
-done:
return rq->tag != -1;
}
@@ -1486,7 +1457,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
-bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
int srcu_idx;
bool need_run;
@@ -1504,12 +1475,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
blk_mq_hctx_has_pending(hctx);
hctx_unlock(hctx, srcu_idx);
- if (need_run) {
+ if (need_run)
__blk_mq_delay_run_hw_queue(hctx, async, 0);
- return true;
- }
-
- return false;
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
@@ -2789,6 +2756,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
int i, j, end;
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
+ if (q->nr_hw_queues < set->nr_hw_queues) {
+ struct blk_mq_hw_ctx **new_hctxs;
+
+ new_hctxs = kcalloc_node(set->nr_hw_queues,
+ sizeof(*new_hctxs), GFP_KERNEL,
+ set->numa_node);
+ if (!new_hctxs)
+ return;
+ if (hctxs)
+ memcpy(new_hctxs, hctxs, q->nr_hw_queues *
+ sizeof(*hctxs));
+ q->queue_hw_ctx = new_hctxs;
+ q->nr_hw_queues = set->nr_hw_queues;
+ kfree(hctxs);
+ hctxs = new_hctxs;
+ }
+
/* protect against switching io scheduler */
mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2844,19 +2828,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
-/*
- * Maximum number of hardware queues we support. For single sets, we'll never
- * have more than the CPUs (software queues). For multiple sets, the tag_set
- * user may have set ->nr_hw_queues larger.
- */
-static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
-{
- if (set->nr_maps == 1)
- return nr_cpu_ids;
-
- return max(set->nr_hw_queues, nr_cpu_ids);
-}
-
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init)
@@ -2876,12 +2847,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
- q->nr_queues = nr_hw_queues(set);
- q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
- GFP_KERNEL, set->numa_node);
- if (!q->queue_hw_ctx)
- goto err_sys_init;
-
INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);
@@ -2929,7 +2894,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
kfree(q->queue_hw_ctx);
q->nr_hw_queues = 0;
-err_sys_init:
blk_mq_sysfs_deinit(q);
err_poll:
blk_stat_free_callback(q->poll_cb);
@@ -3030,6 +2994,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
}
}
+static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
+ int cur_nr_hw_queues, int new_nr_hw_queues)
+{
+ struct blk_mq_tags **new_tags;
+
+ if (cur_nr_hw_queues >= new_nr_hw_queues)
+ return 0;
+
+ new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
+ GFP_KERNEL, set->numa_node);
+ if (!new_tags)
+ return -ENOMEM;
+
+ if (set->tags)
+ memcpy(new_tags, set->tags, cur_nr_hw_queues *
+ sizeof(*set->tags));
+ kfree(set->tags);
+ set->tags = new_tags;
+ set->nr_hw_queues = new_nr_hw_queues;
+
+ return 0;
+}
+
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
@@ -3083,9 +3070,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
set->nr_hw_queues = nr_cpu_ids;
- set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
- GFP_KERNEL, set->numa_node);
- if (!set->tags)
+ if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
return -ENOMEM;
ret = -ENOMEM;
@@ -3126,7 +3111,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
int i, j;
- for (i = 0; i < nr_hw_queues(set); i++)
+ for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i);
for (j = 0; j < set->nr_maps; j++) {
@@ -3271,10 +3256,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
/*
- * Sync with blk_mq_queue_tag_busy_iter.
- */
- synchronize_rcu();
- /*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
* updating the new sw to hw queue mappings.
@@ -3288,6 +3269,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_sysfs_unregister(q);
}
+ if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
+ 0)
+ goto reregister;
+
prev_nr_hw_queues = set->nr_hw_queues;
set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
@@ -3304,6 +3289,7 @@ fallback:
blk_mq_map_swqueue(q);
}
+reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 32c62c64e6c2..eaaca8fc1c28 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_release(struct request_queue *q);
-/**
- * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
- * @rq: target request.
- */
-static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
-{
- return READ_ONCE(rq->state);
-}
-
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
unsigned int cpu)
{
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 940f15d600f8..7da302ff88d0 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now)
struct request_queue *q = rq->q;
struct blk_stat_callback *cb;
struct blk_rq_stat *stat;
- int bucket;
+ int bucket, cpu;
u64 value;
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
@@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now)
blk_throtl_stat_add(rq, value);
rcu_read_lock();
+ cpu = get_cpu();
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
if (!blk_stat_is_active(cb))
continue;
@@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now)
if (bucket < 0)
continue;
- stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+ stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket];
blk_rq_stat_add(stat, value);
- put_cpu_ptr(cb->cpu_stat);
}
+ put_cpu();
rcu_read_unlock();
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 46f5198be017..fca9b158f4a0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dying(q)) {
- mutex_unlock(&q->sysfs_lock);
- return -ENOENT;
- }
res = entry->show(q, page);
mutex_unlock(&q->sysfs_lock);
return res;
@@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dying(q)) {
- mutex_unlock(&q->sysfs_lock);
- return -ENOENT;
- }
res = entry->store(q, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 4bc5f260248a..481eaf7d04d4 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -202,32 +202,14 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-/*
- * Special case of zone reset operation to reset all zones in one command,
- * useful for applications like mkfs.
- */
-static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
-{
- struct bio *bio = bio_alloc(gfp_mask, 0);
- int ret;
-
- /* across the zones operations, don't need any sectors */
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
-
- ret = submit_bio_wait(bio);
- bio_put(bio);
-
- return ret;
-}
-
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
+ sector_t sector,
sector_t nr_sectors)
{
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
return false;
- if (nr_sectors != part_nr_sects_read(bdev->bd_part))
+ if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part))
return false;
/*
* REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
@@ -239,26 +221,29 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
}
/**
- * blkdev_reset_zones - Reset zones write pointer
+ * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
* @bdev: Target block device
- * @sector: Start sector of the first zone to reset
- * @nr_sectors: Number of sectors, at least the length of one zone
+ * @op: Operation to be performed on the zones
+ * @sector: Start sector of the first zone to operate on
+ * @nr_sectors: Number of sectors, should be at least the length of one zone and
+ * must be zone size aligned.
* @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
- * Reset the write pointer of the zones contained in the range
+ * Perform the specified operation on the range of zones specified by
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
* is valid, but the specified range should not contain conventional zones.
+ * The operation to execute on each zone can be a zone reset, open, close
+ * or finish request.
*/
-int blkdev_reset_zones(struct block_device *bdev,
- sector_t sector, sector_t nr_sectors,
- gfp_t gfp_mask)
+int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
{
struct request_queue *q = bdev_get_queue(bdev);
- sector_t zone_sectors;
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
sector_t end_sector = sector + nr_sectors;
struct bio *bio = NULL;
- struct blk_plug plug;
int ret;
if (!blk_queue_is_zoned(q))
@@ -267,15 +252,14 @@ int blkdev_reset_zones(struct block_device *bdev,
if (bdev_read_only(bdev))
return -EPERM;
+ if (!op_is_zone_mgmt(op))
+ return -EOPNOTSUPP;
+
if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
/* Out of range */
return -EINVAL;
- if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
- return __blkdev_reset_all_zones(bdev, gfp_mask);
-
/* Check alignment (handle eventual smaller last zone) */
- zone_sectors = blk_queue_zone_sectors(q);
if (sector & (zone_sectors - 1))
return -EINVAL;
@@ -283,29 +267,34 @@ int blkdev_reset_zones(struct block_device *bdev,
end_sector != bdev->bd_part->nr_sects)
return -EINVAL;
- blk_start_plug(&plug);
while (sector < end_sector) {
-
bio = blk_next_bio(bio, 0, gfp_mask);
- bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
+ /*
+ * Special case for the zone reset operation that reset all
+ * zones, this is useful for applications like mkfs.
+ */
+ if (op == REQ_OP_ZONE_RESET &&
+ blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
+ bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
+ break;
+ }
+
+ bio->bi_opf = op;
+ bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
/* This may take a while, so be nice to others */
cond_resched();
-
}
ret = submit_bio_wait(bio);
bio_put(bio);
- blk_finish_plug(&plug);
-
return ret;
}
-EXPORT_SYMBOL_GPL(blkdev_reset_zones);
+EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
/*
* BLKREPORTZONE ioctl processing.
@@ -368,15 +357,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
}
/*
- * BLKRESETZONE ioctl processing.
+ * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
* Called from blkdev_ioctl.
*/
-int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct request_queue *q;
struct blk_zone_range zrange;
+ enum req_opf op;
if (!argp)
return -EINVAL;
@@ -397,8 +387,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
return -EFAULT;
- return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
- GFP_KERNEL);
+ switch (cmd) {
+ case BLKRESETZONE:
+ op = REQ_OP_ZONE_RESET;
+ break;
+ case BLKOPENZONE:
+ op = REQ_OP_ZONE_OPEN;
+ break;
+ case BLKCLOSEZONE:
+ op = REQ_OP_ZONE_CLOSE;
+ break;
+ case BLKFINISHZONE:
+ op = REQ_OP_ZONE_FINISH;
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+ GFP_KERNEL);
}
static inline unsigned long *blk_alloc_zone_bitmap(int node,
diff --git a/block/blk.h b/block/blk.h
index 47fba9362e60..2bea40180b6f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -242,14 +242,11 @@ int blk_dev_init(void);
* Contribute to IO statistics IFF:
*
* a) it's attached to a gendisk, and
- * b) the queue had IO stats enabled when this request was started, and
- * c) it's a file system request
+ * b) the queue had IO stats enabled when this request was started
*/
static inline bool blk_do_io_stat(struct request *rq)
{
- return rq->rq_disk &&
- (rq->rq_flags & RQF_IO_STAT) &&
- !blk_rq_is_passthrough(rq);
+ return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
}
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
diff --git a/block/elevator.c b/block/elevator.c
index 5437059c9261..0b1db9afb586 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -831,3 +831,12 @@ struct request *elv_rb_latter_request(struct request_queue *q,
return NULL;
}
EXPORT_SYMBOL(elv_rb_latter_request);
+
+static int __init elevator_setup(char *str)
+{
+ pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
+ "Please use sysfs to set IO scheduler for individual devices.\n");
+ return 1;
+}
+
+__setup("elevator=", elevator_setup);
diff --git a/block/ioctl.c b/block/ioctl.c
index 15a0eb80ada9..8756efb1419e 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
- return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+ case BLKOPENZONE:
+ case BLKCLOSEZONE:
+ case BLKFINISHZONE:
+ return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES:
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 5532412d567c..736e67c3e7c5 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -76,7 +76,6 @@ enum opal_response_token {
* Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Section: 6.3 Assigned UIDs
*/
-#define OPAL_UID_LENGTH 8
#define OPAL_METHOD_LENGTH 8
#define OPAL_MSID_KEYLEN 15
#define OPAL_UID_LENGTH_HALF 4
@@ -108,6 +107,7 @@ enum opal_uid {
OPAL_C_PIN_TABLE,
OPAL_LOCKING_INFO_TABLE,
OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
+ OPAL_DATASTORE,
/* C_PIN_TABLE object ID's */
OPAL_C_PIN_MSID,
OPAL_C_PIN_SID,
diff --git a/block/sed-opal.c b/block/sed-opal.c
index b4c761973ac1..b2cacc9ddd11 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -149,6 +149,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
[OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
+ [OPAL_DATASTORE] =
+ { 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 },
/* C_PIN_TABLE object ID's */
[OPAL_C_PIN_MSID] =
@@ -1139,11 +1141,11 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
*
* the result is provided in dev->resp->tok[4]
*/
-static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
+static int generic_get_table_info(struct opal_dev *dev, const u8 *table_uid,
u64 column)
{
u8 uid[OPAL_UID_LENGTH];
- const unsigned int half = OPAL_UID_LENGTH/2;
+ const unsigned int half = OPAL_UID_LENGTH_HALF;
/* sed-opal UIDs can be split in two halves:
* first: actual table index
@@ -1152,7 +1154,7 @@ static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
* first part of the target table as relative index into that table
*/
memcpy(uid, opaluid[OPAL_TABLE_TABLE], half);
- memcpy(uid+half, opaluid[table], half);
+ memcpy(uid + half, table_uid, half);
return generic_get_column(dev, uid, column);
}
@@ -1221,6 +1223,75 @@ static int get_active_key(struct opal_dev *dev, void *data)
return get_active_key_cont(dev);
}
+static int generic_table_write_data(struct opal_dev *dev, const u64 data,
+ u64 offset, u64 size, const u8 *uid)
+{
+ const u8 __user *src = (u8 __user *)(uintptr_t)data;
+ u8 *dst;
+ u64 len;
+ size_t off = 0;
+ int err;
+
+ /* do we fit in the available space? */
+ err = generic_get_table_info(dev, uid, OPAL_TABLE_ROWS);
+ if (err) {
+ pr_debug("Couldn't get the table size\n");
+ return err;
+ }
+
+ len = response_get_u64(&dev->parsed, 4);
+ if (size > len || offset > len - size) {
+ pr_debug("Does not fit in the table (%llu vs. %llu)\n",
+ offset + size, len);
+ return -ENOSPC;
+ }
+
+ /* do the actual transmission(s) */
+ while (off < size) {
+ err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_WHERE);
+ add_token_u64(&err, dev, offset + off);
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_VALUES);
+
+ /*
+ * The bytestring header is either 1 or 2 bytes, so assume 2.
+ * There also needs to be enough space to accommodate the
+ * trailing OPAL_ENDNAME (1 byte) and tokens added by
+ * cmd_finalize.
+ */
+ len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
+ (size_t)(size - off));
+ pr_debug("Write bytes %zu+%llu/%llu\n", off, len, size);
+
+ dst = add_bytestring_header(&err, dev, len);
+ if (!dst)
+ break;
+
+ if (copy_from_user(dst, src + off, len)) {
+ err = -EFAULT;
+ break;
+ }
+
+ dev->pos += len;
+
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+ if (err)
+ break;
+
+ err = finalize_and_send(dev, parse_and_check_status);
+ if (err)
+ break;
+
+ off += len;
+ }
+
+ return err;
+}
+
static int generic_lr_enable_disable(struct opal_dev *dev,
u8 *uid, bool rle, bool wle,
bool rl, bool wl)
@@ -1583,68 +1654,9 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
static int write_shadow_mbr(struct opal_dev *dev, void *data)
{
struct opal_shadow_mbr *shadow = data;
- const u8 __user *src;
- u8 *dst;
- size_t off = 0;
- u64 len;
- int err = 0;
-
- /* do we fit in the available shadow mbr space? */
- err = generic_get_table_info(dev, OPAL_MBR, OPAL_TABLE_ROWS);
- if (err) {
- pr_debug("MBR: could not get shadow size\n");
- return err;
- }
-
- len = response_get_u64(&dev->parsed, 4);
- if (shadow->size > len || shadow->offset > len - shadow->size) {
- pr_debug("MBR: does not fit in shadow (%llu vs. %llu)\n",
- shadow->offset + shadow->size, len);
- return -ENOSPC;
- }
-
- /* do the actual transmission(s) */
- src = (u8 __user *)(uintptr_t)shadow->data;
- while (off < shadow->size) {
- err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]);
- add_token_u8(&err, dev, OPAL_STARTNAME);
- add_token_u8(&err, dev, OPAL_WHERE);
- add_token_u64(&err, dev, shadow->offset + off);
- add_token_u8(&err, dev, OPAL_ENDNAME);
-
- add_token_u8(&err, dev, OPAL_STARTNAME);
- add_token_u8(&err, dev, OPAL_VALUES);
-
- /*
- * The bytestring header is either 1 or 2 bytes, so assume 2.
- * There also needs to be enough space to accommodate the
- * trailing OPAL_ENDNAME (1 byte) and tokens added by
- * cmd_finalize.
- */
- len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
- (size_t)(shadow->size - off));
- pr_debug("MBR: write bytes %zu+%llu/%llu\n",
- off, len, shadow->size);
-
- dst = add_bytestring_header(&err, dev, len);
- if (!dst)
- break;
- if (copy_from_user(dst, src + off, len))
- err = -EFAULT;
- dev->pos += len;
-
- add_token_u8(&err, dev, OPAL_ENDNAME);
- if (err)
- break;
-
- err = finalize_and_send(dev, parse_and_check_status);
- if (err)
- break;
-
- off += len;
- }
- return err;
+ return generic_table_write_data(dev, shadow->data, shadow->offset,
+ shadow->size, opaluid[OPAL_MBR]);
}
static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
@@ -1957,6 +1969,113 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
return 0;
}
+static int write_table_data(struct opal_dev *dev, void *data)
+{
+ struct opal_read_write_table *write_tbl = data;
+
+ return generic_table_write_data(dev, write_tbl->data, write_tbl->offset,
+ write_tbl->size, write_tbl->table_uid);
+}
+
+static int read_table_data_cont(struct opal_dev *dev)
+{
+ int err;
+ const char *data_read;
+
+ err = parse_and_check_status(dev);
+ if (err)
+ return err;
+
+ dev->prev_d_len = response_get_string(&dev->parsed, 1, &data_read);
+ dev->prev_data = (void *)data_read;
+ if (!dev->prev_data) {
+ pr_debug("%s: Couldn't read data from the table.\n", __func__);
+ return OPAL_INVAL_PARAM;
+ }
+
+ return 0;
+}
+
+/*
+ * IO_BUFFER_LENGTH = 2048
+ * sizeof(header) = 56
+ * No. of Token Bytes in the Response = 11
+ * MAX size of data that can be carried in response buffer
+ * at a time is : 2048 - (56 + 11) = 1981 = 0x7BD.
+ */
+#define OPAL_MAX_READ_TABLE (0x7BD)
+
+static int read_table_data(struct opal_dev *dev, void *data)
+{
+ struct opal_read_write_table *read_tbl = data;
+ int err;
+ size_t off = 0, max_read_size = OPAL_MAX_READ_TABLE;
+ u64 table_len, len;
+ u64 offset = read_tbl->offset, read_size = read_tbl->size - 1;
+ u8 __user *dst;
+
+ err = generic_get_table_info(dev, read_tbl->table_uid, OPAL_TABLE_ROWS);
+ if (err) {
+ pr_debug("Couldn't get the table size\n");
+ return err;
+ }
+
+ table_len = response_get_u64(&dev->parsed, 4);
+
+ /* Check if the user is trying to read from the table limits */
+ if (read_size > table_len || offset > table_len - read_size) {
+ pr_debug("Read size exceeds the Table size limits (%llu vs. %llu)\n",
+ offset + read_size, table_len);
+ return -EINVAL;
+ }
+
+ while (off < read_size) {
+ err = cmd_start(dev, read_tbl->table_uid, opalmethod[OPAL_GET]);
+
+ add_token_u8(&err, dev, OPAL_STARTLIST);
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_STARTROW);
+ add_token_u64(&err, dev, offset + off); /* start row value */
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_ENDROW);
+
+ len = min(max_read_size, (size_t)(read_size - off));
+ add_token_u64(&err, dev, offset + off + len); /* end row value
+ */
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+ add_token_u8(&err, dev, OPAL_ENDLIST);
+
+ if (err) {
+ pr_debug("Error building read table data command.\n");
+ break;
+ }
+
+ err = finalize_and_send(dev, read_table_data_cont);
+ if (err)
+ break;
+
+ /* len+1: This includes the NULL terminator at the end*/
+ if (dev->prev_d_len > len + 1) {
+ err = -EOVERFLOW;
+ break;
+ }
+
+ dst = (u8 __user *)(uintptr_t)read_tbl->data;
+ if (copy_to_user(dst + off, dev->prev_data, dev->prev_d_len)) {
+ pr_debug("Error copying data to userspace\n");
+ err = -EFAULT;
+ break;
+ }
+ dev->prev_data = NULL;
+
+ off += len;
+ }
+
+ return err;
+}
+
static int end_opal_session(struct opal_dev *dev, void *data)
{
int err = 0;
@@ -2443,6 +2562,68 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
}
EXPORT_SYMBOL(opal_unlock_from_suspend);
+static int opal_read_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ const struct opal_step read_table_steps[] = {
+ { start_admin1LSP_opal_session, &rw_tbl->key },
+ { read_table_data, rw_tbl },
+ { end_opal_session, }
+ };
+ int ret = 0;
+
+ if (!rw_tbl->size)
+ return ret;
+
+ return execute_steps(dev, read_table_steps,
+ ARRAY_SIZE(read_table_steps));
+}
+
+static int opal_write_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ const struct opal_step write_table_steps[] = {
+ { start_admin1LSP_opal_session, &rw_tbl->key },
+ { write_table_data, rw_tbl },
+ { end_opal_session, }
+ };
+ int ret = 0;
+
+ if (!rw_tbl->size)
+ return ret;
+
+ return execute_steps(dev, write_table_steps,
+ ARRAY_SIZE(write_table_steps));
+}
+
+static int opal_generic_read_write_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ int ret, bit_set;
+
+ mutex_lock(&dev->dev_lock);
+ setup_opal_dev(dev);
+
+ bit_set = fls64(rw_tbl->flags) - 1;
+ switch (bit_set) {
+ case OPAL_READ_TABLE:
+ ret = opal_read_table(dev, rw_tbl);
+ break;
+ case OPAL_WRITE_TABLE:
+ ret = opal_write_table(dev, rw_tbl);
+ break;
+ default:
+ pr_debug("Invalid bit set in the flag (%016llx).\n",
+ rw_tbl->flags);
+ ret = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&dev->dev_lock);
+
+ return ret;
+}
+
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
{
void *p;
@@ -2505,6 +2686,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
case IOC_OPAL_PSID_REVERT_TPR:
ret = opal_reverttper(dev, p, true);
break;
+ case IOC_OPAL_GENERIC_TABLE_RW:
+ ret = opal_generic_read_write_table(dev, p);
+ break;
default:
break;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 9803c7e0376e..f4907d941f03 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -235,16 +235,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
}
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_prepare(struct request *rq)
{
}
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
{
}
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 595a73110e17..feb4718ce6a6 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1312,9 +1312,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zmd->dev;
- ret = blkdev_reset_zones(dev->bdev,
- dmz_start_sect(zmd, zone),
- dev->zone_nr_sectors, GFP_NOIO);
+ ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
+ dmz_start_sect(zmd, zone),
+ dev->zone_nr_sectors, GFP_NOIO);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
dmz_id(zmd, zone), ret);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9c073dbdc1b0..d612468ee66b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1403,11 +1403,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
"resized disk %s\n",
bdev->bd_disk ? bdev->bd_disk->disk_name : "");
}
-
- if (!bdev->bd_disk)
- return;
- if (disk_part_scan_enabled(bdev->bd_disk))
- bdev->bd_invalidated = 1;
+ bdev->bd_invalidated = 1;
}
/**
@@ -1512,6 +1508,19 @@ EXPORT_SYMBOL(bd_set_size);
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
+{
+ if (disk_part_scan_enabled(bdev->bd_disk)) {
+ if (invalidate)
+ invalidate_partitions(bdev->bd_disk, bdev);
+ else
+ rescan_partitions(bdev->bd_disk, bdev);
+ } else {
+ check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
+ bdev->bd_invalidated = 0;
+ }
+}
+
/*
* bd_mutex locking:
*
@@ -1594,12 +1603,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
- if (bdev->bd_invalidated) {
- if (!ret)
- rescan_partitions(disk, bdev);
- else if (ret == -ENOMEDIUM)
- invalidate_partitions(disk, bdev);
- }
+ if (bdev->bd_invalidated &&
+ (!ret || ret == -ENOMEDIUM))
+ bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_clear;
@@ -1632,12 +1638,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
- if (bdev->bd_invalidated) {
- if (!ret)
- rescan_partitions(bdev->bd_disk, bdev);
- else if (ret == -ENOMEDIUM)
- invalidate_partitions(bdev->bd_disk, bdev);
- }
+ if (bdev->bd_invalidated &&
+ (!ret || ret == -ENOMEDIUM))
+ bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_unlock_bdev;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 808709581481..2c997f94a3b2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
+ return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects, GFP_NOFS);
}
/* For conventional zones, use regular discard if supported */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3d40771e8e7c..41b6438bd2d9 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -261,7 +261,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
static bool rw_hint_valid(enum rw_hint hint)
{
switch (hint) {
- case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE:
case RWH_WRITE_LIFE_SHORT:
case RWH_WRITE_LIFE_MEDIUM:
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0bf056de5cc3..dc03e059fdff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -10,103 +10,239 @@ struct blk_mq_tags;
struct blk_flush_queue;
/**
- * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
+ * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
+ * block device
*/
struct blk_mq_hw_ctx {
struct {
+ /** @lock: Protects the dispatch list. */
spinlock_t lock;
+ /**
+ * @dispatch: Used for requests that are ready to be
+ * dispatched to the hardware but for some reason (e.g. lack of
+ * resources) could not be sent to the hardware. As soon as the
+ * driver can send new requests, requests at this list will
+ * be sent first for a fairer dispatch.
+ */
struct list_head dispatch;
- unsigned long state; /* BLK_MQ_S_* flags */
+ /**
+ * @state: BLK_MQ_S_* flags. Defines the state of the hw
+ * queue (active, scheduled to restart, stopped).
+ */
+ unsigned long state;
} ____cacheline_aligned_in_smp;
+ /**
+ * @run_work: Used for scheduling a hardware queue run at a later time.
+ */
struct delayed_work run_work;
+ /** @cpumask: Map of available CPUs where this hctx can run. */
cpumask_var_t cpumask;
+ /**
+ * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
+ * selection from @cpumask.
+ */
int next_cpu;
+ /**
+ * @next_cpu_batch: Counter of how many works left in the batch before
+ * changing to the next CPU.
+ */
int next_cpu_batch;
- unsigned long flags; /* BLK_MQ_F_* flags */
+ /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
+ unsigned long flags;
+ /**
+ * @sched_data: Pointer owned by the IO scheduler attached to a request
+ * queue. It's up to the IO scheduler how to use this pointer.
+ */
void *sched_data;
+ /**
+ * @queue: Pointer to the request queue that owns this hardware context.
+ */
struct request_queue *queue;
+ /** @fq: Queue of requests that need to perform a flush operation. */
struct blk_flush_queue *fq;
+ /**
+ * @driver_data: Pointer to data owned by the block driver that created
+ * this hctx
+ */
void *driver_data;
+ /**
+ * @ctx_map: Bitmap for each software queue. If bit is on, there is a
+ * pending request in that software queue.
+ */
struct sbitmap ctx_map;
+ /**
+ * @dispatch_from: Software queue to be used when no scheduler was
+ * selected.
+ */
struct blk_mq_ctx *dispatch_from;
+ /**
+ * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
+ * decide if the hw_queue is busy using Exponential Weighted Moving
+ * Average algorithm.
+ */
unsigned int dispatch_busy;
+ /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
unsigned short type;
+ /** @nr_ctx: Number of software queues. */
unsigned short nr_ctx;
+ /** @ctxs: Array of software queues. */
struct blk_mq_ctx **ctxs;
+ /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
spinlock_t dispatch_wait_lock;
+ /**
+ * @dispatch_wait: Waitqueue to put requests when there is no tag
+ * available at the moment, to wait for another try in the future.
+ */
wait_queue_entry_t dispatch_wait;
+
+ /**
+ * @wait_index: Index of next available dispatch_wait queue to insert
+ * requests.
+ */
atomic_t wait_index;
+ /**
+ * @tags: Tags owned by the block driver. A tag at this set is only
+ * assigned when a request is dispatched from a hardware queue.
+ */
struct blk_mq_tags *tags;
+ /**
+ * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
+ * scheduler associated with a request queue, a tag is assigned when
+ * that request is allocated. Else, this member is not used.
+ */
struct blk_mq_tags *sched_tags;
+ /** @queued: Number of queued requests. */
unsigned long queued;
+ /** @run: Number of dispatched requests. */
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7
+ /** @dispatched: Number of dispatch requests by queue. */
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
+ /** @numa_node: NUMA node the storage adapter has been connected to. */
unsigned int numa_node;
+ /** @queue_num: Index of this hardware queue. */
unsigned int queue_num;
+ /**
+ * @nr_active: Number of active requests. Only used when a tag set is
+ * shared across request queues.
+ */
atomic_t nr_active;
+ /** @cpuhp_dead: List to store request if some CPU die. */
struct hlist_node cpuhp_dead;
+ /** @kobj: Kernel object for sysfs. */
struct kobject kobj;
+ /** @poll_considered: Count times blk_poll() was called. */
unsigned long poll_considered;
+ /** @poll_invoked: Count how many requests blk_poll() polled. */
unsigned long poll_invoked;
+ /** @poll_success: Count how many polled requests were completed. */
unsigned long poll_success;
#ifdef CONFIG_BLK_DEBUG_FS
+ /**
+ * @debugfs_dir: debugfs directory for this hardware queue. Named
+ * as cpu<cpu_number>.
+ */
struct dentry *debugfs_dir;
+ /** @sched_debugfs_dir: debugfs directory for the scheduler. */
struct dentry *sched_debugfs_dir;
#endif
+ /** @hctx_list: List of all hardware queues. */
struct list_head hctx_list;
- /* Must be the last member - see also blk_mq_hw_ctx_size(). */
+ /**
+ * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
+ * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
+ * blk_mq_hw_ctx_size().
+ */
struct srcu_struct srcu[0];
};
+/**
+ * struct blk_mq_queue_map - Map software queues to hardware queues
+ * @mq_map: CPU ID to hardware queue index map. This is an array
+ * with nr_cpu_ids elements. Each element has a value in the range
+ * [@queue_offset, @queue_offset + @nr_queues).
+ * @nr_queues: Number of hardware queues to map CPU IDs onto.
+ * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
+ * driver to map each hardware queue type (enum hctx_type) onto a distinct
+ * set of hardware queues.
+ */
struct blk_mq_queue_map {
unsigned int *mq_map;
unsigned int nr_queues;
unsigned int queue_offset;
};
+/**
+ * enum hctx_type - Type of hardware queue
+ * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
+ * @HCTX_TYPE_READ: Just for READ I/O.
+ * @HCTX_TYPE_POLL: Polled I/O of any kind.
+ * @HCTX_MAX_TYPES: Number of types of hctx.
+ */
enum hctx_type {
- HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
- HCTX_TYPE_READ, /* just for READ I/O */
- HCTX_TYPE_POLL, /* polled I/O of any kind */
+ HCTX_TYPE_DEFAULT,
+ HCTX_TYPE_READ,
+ HCTX_TYPE_POLL,
HCTX_MAX_TYPES,
};
+/**
+ * struct blk_mq_tag_set - tag set that can be shared between request queues
+ * @map: One or more ctx -> hctx mappings. One map exists for each
+ * hardware queue type (enum hctx_type) that the driver wishes
+ * to support. There are no restrictions on maps being of the
+ * same size, and it's perfectly legal to share maps between
+ * types.
+ * @nr_maps: Number of elements in the @map array. A number in the range
+ * [1, HCTX_MAX_TYPES].
+ * @ops: Pointers to functions that implement block driver behavior.
+ * @nr_hw_queues: Number of hardware queues supported by the block driver that
+ * owns this data structure.
+ * @queue_depth: Number of tags per hardware queue, reserved tags included.
+ * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
+ * allocations.
+ * @cmd_size: Number of additional bytes to allocate per request. The block
+ * driver owns these additional bytes.
+ * @numa_node: NUMA node the storage adapter has been connected to.
+ * @timeout: Request processing timeout in jiffies.
+ * @flags: Zero or more BLK_MQ_F_* flags.
+ * @driver_data: Pointer to data owned by the block driver that created this
+ * tag set.
+ * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
+ * elements.
+ * @tag_list_lock: Serializes tag_list accesses.
+ * @tag_list: List of the request queues that use this tag set. See also
+ * request_queue.tag_set_list.
+ */
struct blk_mq_tag_set {
- /*
- * map[] holds ctx -> hctx mappings, one map exists for each type
- * that the driver wishes to support. There are no restrictions
- * on maps being of the same size, and it's perfectly legal to
- * share maps between types.
- */
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
- unsigned int nr_maps; /* nr entries in map[] */
+ unsigned int nr_maps;
const struct blk_mq_ops *ops;
- unsigned int nr_hw_queues; /* nr hw queues across maps */
- unsigned int queue_depth; /* max hw supported */
+ unsigned int nr_hw_queues;
+ unsigned int queue_depth;
unsigned int reserved_tags;
- unsigned int cmd_size; /* per-request extra data */
+ unsigned int cmd_size;
int numa_node;
unsigned int timeout;
- unsigned int flags; /* BLK_MQ_F_* */
+ unsigned int flags;
void *driver_data;
struct blk_mq_tags **tags;
@@ -115,6 +251,12 @@ struct blk_mq_tag_set {
struct list_head tag_list;
};
+/**
+ * struct blk_mq_queue_data - Data about a request inserted in a queue
+ *
+ * @rq: Request pointer.
+ * @last: If it is the last request in the queue.
+ */
struct blk_mq_queue_data {
struct request *rq;
bool last;
@@ -142,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *);
typedef void (complete_fn)(struct request *);
typedef void (cleanup_rq_fn)(struct request *);
-
+/**
+ * struct blk_mq_ops - Callback functions that implements block driver
+ * behaviour.
+ */
struct blk_mq_ops {
- /*
- * Queue request
+ /**
+ * @queue_rq: Queue a new request from block IO.
*/
queue_rq_fn *queue_rq;
- /*
- * If a driver uses bd->last to judge when to submit requests to
- * hardware, it must define this function. In case of errors that
- * make us stop issuing further requests, this hook serves the
+ /**
+ * @commit_rqs: If a driver uses bd->last to judge when to submit
+ * requests to hardware, it must define this function. In case of errors
+ * that make us stop issuing further requests, this hook serves the
* purpose of kicking the hardware (which the last request otherwise
* would have done).
*/
commit_rqs_fn *commit_rqs;
- /*
- * Reserve budget before queue request, once .queue_rq is
+ /**
+ * @get_budget: Reserve budget before queue request, once .queue_rq is
* run, it is driver's responsibility to release the
* reserved budget. Also we have to handle failure case
* of .get_budget for avoiding I/O deadlock.
*/
get_budget_fn *get_budget;
+ /**
+ * @put_budget: Release the reserved budget.
+ */
put_budget_fn *put_budget;
- /*
- * Called on request timeout
+ /**
+ * @timeout: Called on request timeout.
*/
timeout_fn *timeout;
- /*
- * Called to poll for completion of a specific tag.
+ /**
+ * @poll: Called to poll for completion of a specific tag.
*/
poll_fn *poll;
+ /**
+ * @complete: Mark the request as complete.
+ */
complete_fn *complete;
- /*
- * Called when the block layer side of a hardware queue has been
- * set up, allowing the driver to allocate/init matching structures.
- * Ditto for exit/teardown.
+ /**
+ * @init_hctx: Called when the block layer side of a hardware queue has
+ * been set up, allowing the driver to allocate/init matching
+ * structures.
*/
init_hctx_fn *init_hctx;
+ /**
+ * @exit_hctx: Ditto for exit/teardown.
+ */
exit_hctx_fn *exit_hctx;
- /*
- * Called for every command allocated by the block layer to allow
- * the driver to set up driver specific data.
+ /**
+ * @init_request: Called for every command allocated by the block layer
+ * to allow the driver to set up driver specific data.
*
* Tag greater than or equal to queue_depth is for setting up
* flush request.
- *
- * Ditto for exit/teardown.
*/
init_request_fn *init_request;
+ /**
+ * @exit_request: Ditto for exit/teardown.
+ */
exit_request_fn *exit_request;
- /* Called from inside blk_get_request() */
+
+ /**
+ * @initialize_rq_fn: Called from inside blk_get_request().
+ */
void (*initialize_rq_fn)(struct request *rq);
- /*
- * Called before freeing one request which isn't completed yet,
- * and usually for freeing the driver private data
+ /**
+ * @cleanup_rq: Called before freeing one request which isn't completed
+ * yet, and usually for freeing the driver private data.
*/
cleanup_rq_fn *cleanup_rq;
- /*
- * If set, returns whether or not this queue currently is busy
+ /**
+ * @busy: If set, returns whether or not this queue currently is busy.
*/
busy_fn *busy;
+ /**
+ * @map_queues: This allows drivers specify their own queue mapping by
+ * overriding the setup-time function that builds the mq_map.
+ */
map_queues_fn *map_queues;
#ifdef CONFIG_BLK_DEBUG_FS
- /*
- * Used by the debugfs implementation to show driver-specific
+ /**
+ * @show_rq: Used by the debugfs implementation to show driver-specific
* information about a request.
*/
void (*show_rq)(struct seq_file *m, struct request *rq);
@@ -301,9 +463,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
}
+/**
+ * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
+ * @rq: target request.
+ */
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
+{
+ return READ_ONCE(rq->state);
+}
+
+static inline int blk_mq_request_started(struct request *rq)
+{
+ return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
+}
+
+static inline int blk_mq_request_completed(struct request *rq)
+{
+ return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
+}
-int blk_mq_request_started(struct request *rq);
-int blk_mq_request_completed(struct request *rq);
void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, blk_status_t error);
void __blk_mq_end_request(struct request *rq, blk_status_t error);
@@ -324,7 +502,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
busy_tag_iter_fn *fn, void *priv);
@@ -343,14 +521,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
unsigned int blk_mq_rq_cpu(struct request *rq);
-/*
+/**
+ * blk_mq_rq_from_pdu - cast a PDU to a request
+ * @pdu: the PDU (Protocol Data Unit) to be casted
+ *
+ * Return: request
+ *
* Driver command data is immediately after the request. So subtract request
- * size to get back to the original request, add request size to get the PDU.
+ * size to get back to the original request.
*/
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct request);
}
+
+/**
+ * blk_mq_rq_to_pdu - cast a request to a PDU
+ * @rq: the request to be casted
+ *
+ * Return: pointer to the PDU
+ *
+ * Driver command data is immediately after the request. So add request to get
+ * the PDU.
+ */
static inline void *blk_mq_rq_to_pdu(struct request *rq)
{
return rq + 1;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d688b96d1d63..23a2fd534817 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -153,10 +153,10 @@ struct bio {
unsigned short bi_write_hint;
blk_status_t bi_status;
u8 bi_partno;
+ atomic_t __bi_remaining;
struct bvec_iter bi_iter;
- atomic_t __bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
@@ -290,6 +290,12 @@ enum req_opf {
REQ_OP_ZONE_RESET_ALL = 8,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
+ /* Open a zone */
+ REQ_OP_ZONE_OPEN = 10,
+ /* Close a zone */
+ REQ_OP_ZONE_CLOSE = 11,
+ /* Transition a zone to full */
+ REQ_OP_ZONE_FINISH = 12,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
@@ -417,6 +423,25 @@ static inline bool op_is_discard(unsigned int op)
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
}
+/*
+ * Check if a bio or request operation is a zone management operation, with
+ * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
+ * due to its different handling in the block layer and device response in
+ * case of command failure.
+ */
+static inline bool op_is_zone_mgmt(enum req_opf op)
+{
+ switch (op & REQ_OP_MASK) {
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline int op_stat_group(unsigned int op)
{
if (op_is_discard(op))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3ea78b0c91c..6a4f7abbdcf7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -360,14 +360,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_report_zones(struct block_device *bdev,
sector_t sector, struct blk_zone *zones,
unsigned int *nr_zones);
-extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
- sector_t nr_sectors, gfp_t gfp_mask);
+extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+ sector_t sectors, sector_t nr_sectors,
+ gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
-extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
+extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
#else /* CONFIG_BLK_DEV_ZONED */
@@ -388,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
return -ENOTTY;
}
-static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
- fmode_t mode, unsigned int cmd,
- unsigned long arg)
+static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
+ fmode_t mode, unsigned int cmd,
+ unsigned long arg)
{
return -ENOTTY;
}
@@ -411,7 +412,6 @@ struct request_queue {
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
- unsigned int nr_queues;
unsigned int queue_depth;
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 53c28d750a45..1ac0d712a9c3 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_PSID_REVERT_TPR:
case IOC_OPAL_MBR_DONE:
case IOC_OPAL_WRITE_SHADOW_MBR:
+ case IOC_OPAL_GENERIC_TABLE_RW:
return true;
}
return false;
diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h
index b048694070e2..37342a13c9cb 100644
--- a/include/trace/events/wbt.h
+++ b/include/trace/events/wbt.h
@@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->rmean = stat[0].mean;
__entry->rmin = stat[0].min;
__entry->rmax = stat[0].max;
@@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->lat = div_u64(lat, 1000);
),
@@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->msg = msg;
__entry->step = step;
__entry->window = div_u64(window, 1000);
@@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->status = status;
__entry->step = step;
__entry->inflight = inflight;
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index 498eec813494..0cdef67135f0 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -120,9 +120,11 @@ struct blk_zone_report {
};
/**
- * struct blk_zone_range - BLKRESETZONE ioctl request
- * @sector: starting sector of the first zone to issue reset write pointer
- * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
+ * BLKCLOSEZONE/BLKFINISHZONE ioctl
+ * requests
+ * @sector: Starting sector of the first zone to operate on.
+ * @nr_sectors: Total number of sectors of all zones to operate on.
*/
struct blk_zone_range {
__u64 sector;
@@ -139,10 +141,19 @@ struct blk_zone_range {
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
* @BLKGETNRZONES: Get the total number of zones of the device.
+ * @BLKOPENZONE: Open the zones in the specified sector range.
+ * The 512 B sector range must be zone aligned.
+ * @BLKCLOSEZONE: Close the zones in the specified sector range.
+ * The 512 B sector range must be zone aligned.
+ * @BLKFINISHZONE: Mark the zones as full in the specified sector range.
+ * The 512 B sector range must be zone aligned.
*/
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
#define BLKGETZONESZ _IOR(0x12, 132, __u32)
#define BLKGETNRZONES _IOR(0x12, 133, __u32)
+#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
+#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
+#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 1d338357df8a..1f97b33c840e 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -58,7 +58,7 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
-#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
@@ -66,6 +66,13 @@
#define RWH_WRITE_LIFE_EXTREME 5
/*
+ * The originally introduced spelling is remained from the first
+ * versions of the patch set that introduced the feature, see commit
+ * v4.13-rc1~212^2~51.
+ */
+#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index c6d035fa1b6c..6f5af1a84213 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -113,6 +113,25 @@ struct opal_shadow_mbr {
__u64 size;
};
+/* Opal table operations */
+enum opal_table_ops {
+ OPAL_READ_TABLE,
+ OPAL_WRITE_TABLE,
+};
+
+#define OPAL_UID_LENGTH 8
+struct opal_read_write_table {
+ struct opal_key key;
+ const __u64 data;
+ const __u8 table_uid[OPAL_UID_LENGTH];
+ __u64 offset;
+ __u64 size;
+#define OPAL_TABLE_READ (1 << OPAL_READ_TABLE)
+#define OPAL_TABLE_WRITE (1 << OPAL_WRITE_TABLE)
+ __u64 flags;
+ __u64 priv;
+};
+
#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
@@ -128,5 +147,6 @@ struct opal_shadow_mbr {
#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key)
#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done)
#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr)
+#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table)
#endif /* _UAPI_SED_OPAL_H */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 1d338357df8a..1f97b33c840e 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -58,7 +58,7 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
-#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
@@ -66,6 +66,13 @@
#define RWH_WRITE_LIFE_EXTREME 5
/*
+ * The originally introduced spelling is remained from the first
+ * versions of the patch set that introduced the feature, see commit
+ * v4.13-rc1~212^2~51.
+ */
+#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */