summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig13
-rw-r--r--block/bio.c9
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-map.c91
-rw-r--r--block/blk-merge.c34
-rw-r--r--block/blk-mq.c6
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c5
-rw-r--r--block/deadline-iosched.c3
-rw-r--r--block/ioctl.c38
-rw-r--r--block/partition-generic.c29
12 files changed, 148 insertions, 99 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 161491d0a879..0363cd731320 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -88,6 +88,19 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
+config BLK_DEV_DAX
+ bool "Block device DAX support"
+ depends on FS_DAX
+ depends on BROKEN
+ help
+ When DAX support is available (CONFIG_FS_DAX) raw block
+ devices can also support direct userspace access to the
+ storage capacity via MMAP(2) similar to a file on a
+ DAX-enabled filesystem. However, the DAX I/O-path disables
+ some standard I/O-statistics, and the MMAP(2) path has some
+ operational differences due to bypassing the page
+ cache. If in doubt, say N.
+
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
diff --git a/block/bio.c b/block/bio.c
index dbabd48b1934..cf7591551b17 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -874,7 +874,7 @@ int submit_bio_wait(int rw, struct bio *bio)
bio->bi_private = &ret;
bio->bi_end_io = submit_bio_wait_endio;
submit_bio(rw, bio);
- wait_for_completion(&ret.event);
+ wait_for_completion_io(&ret.event);
return ret.error;
}
@@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio)
if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
/*
* if we're in a workqueue, the request is orphaned, so
- * don't copy into a random user address space, just free.
+ * don't copy into a random user address space, just free
+ * and return -EINTR so user space doesn't expect any data.
*/
- if (current->mm && bio_data_dir(bio) == READ)
+ if (!current->mm)
+ ret = -EINTR;
+ else if (bio_data_dir(bio) == READ)
ret = bio_copy_to_iter(bio, bmd->iter);
if (bmd->is_our_pages)
bio_free_pages(bio);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5a37188b559f..66e6f1aae02e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
{
struct gendisk *disk;
struct blkcg_gq *blkg;
+ struct module *owner;
unsigned int major, minor;
int key_len, part, ret;
char *body;
@@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
if (!disk)
return -ENODEV;
if (part) {
+ owner = disk->fops->owner;
put_disk(disk);
+ module_put(owner);
return -ENODEV;
}
@@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
ret = PTR_ERR(blkg);
rcu_read_unlock();
spin_unlock_irq(disk->queue->queue_lock);
+ owner = disk->fops->owner;
put_disk(disk);
+ module_put(owner);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
@@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
__releases(ctx->disk->queue->queue_lock) __releases(rcu)
{
+ struct module *owner;
+
spin_unlock_irq(ctx->disk->queue->queue_lock);
rcu_read_unlock();
+ owner = ctx->disk->fops->owner;
put_disk(ctx->disk);
+ module_put(owner);
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
diff --git a/block/blk-core.c b/block/blk-core.c
index ab51685988c2..b83d29755b5a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2455,14 +2455,16 @@ struct request *blk_peek_request(struct request_queue *q)
rq = NULL;
break;
- } else if (ret == BLKPREP_KILL) {
+ } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
+ int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
+
rq->cmd_flags |= REQ_QUIET;
/*
* Mark this request as started so we don't trigger
* any debug logic in the end I/O path.
*/
blk_start_request(rq);
- __blk_end_request_all(rq, -EIO);
+ __blk_end_request_all(rq, err);
} else {
printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
break;
diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11f465a..a54f0543b956 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -57,6 +57,49 @@ static int __blk_rq_unmap_user(struct bio *bio)
return ret;
}
+static int __blk_rq_map_user_iov(struct request *rq,
+ struct rq_map_data *map_data, struct iov_iter *iter,
+ gfp_t gfp_mask, bool copy)
+{
+ struct request_queue *q = rq->q;
+ struct bio *bio, *orig_bio;
+ int ret;
+
+ if (copy)
+ bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
+ else
+ bio = bio_map_user_iov(q, iter, gfp_mask);
+
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ if (map_data && map_data->null_mapped)
+ bio_set_flag(bio, BIO_NULL_MAPPED);
+
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ if (map_data)
+ map_data->offset += bio->bi_iter.bi_size;
+
+ orig_bio = bio;
+ blk_queue_bounce(q, &bio);
+
+ /*
+ * We link the bounce buffer in and could have to traverse it
+ * later so we have to get a ref to prevent it from being freed
+ */
+ bio_get(bio);
+
+ ret = blk_rq_append_bio(q, rq, bio);
+ if (ret) {
+ bio_endio(bio);
+ __blk_rq_unmap_user(orig_bio);
+ bio_put(bio);
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
@@ -82,10 +125,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data,
const struct iov_iter *iter, gfp_t gfp_mask)
{
- struct bio *bio;
- int unaligned = 0;
- struct iov_iter i;
struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
+ bool copy = (q->dma_pad_mask & iter->count) || map_data;
+ struct bio *bio = NULL;
+ struct iov_iter i;
+ int ret;
if (!iter || !iter->count)
return -EINVAL;
@@ -101,42 +145,29 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
*/
if ((uaddr & queue_dma_alignment(q)) ||
iovec_gap_to_prv(q, &prv, &iov))
- unaligned = 1;
+ copy = true;
prv.iov_base = iov.iov_base;
prv.iov_len = iov.iov_len;
}
- if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
- bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
- else
- bio = bio_map_user_iov(q, iter, gfp_mask);
-
- if (IS_ERR(bio))
- return PTR_ERR(bio);
-
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
-
- if (bio->bi_iter.bi_size != iter->count) {
- /*
- * Grab an extra reference to this bio, as bio_unmap_user()
- * expects to be able to drop it twice as it happens on the
- * normal IO completion path
- */
- bio_get(bio);
- bio_endio(bio);
- __blk_rq_unmap_user(bio);
- return -EINVAL;
- }
+ i = *iter;
+ do {
+ ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
+ if (ret)
+ goto unmap_rq;
+ if (!bio)
+ bio = rq->bio;
+ } while (iov_iter_count(&i));
if (!bio_flagged(bio, BIO_USER_MAPPED))
rq->cmd_flags |= REQ_COPY_USER;
-
- blk_queue_bounce(q, &bio);
- bio_get(bio);
- blk_rq_bio_prep(q, rq, bio);
return 0;
+
+unmap_rq:
+ __blk_rq_unmap_user(bio);
+ rq->bio = NULL;
+ return -EINVAL;
}
EXPORT_SYMBOL(blk_rq_map_user_iov);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1699df5b0493..261353166dcf 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -70,6 +70,18 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs);
}
+static inline unsigned get_max_io_size(struct request_queue *q,
+ struct bio *bio)
+{
+ unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+ unsigned mask = queue_logical_block_size(q) - 1;
+
+ /* aligned to logical block size */
+ sectors &= ~(mask >> 9);
+
+ return sectors;
+}
+
static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio *bio,
struct bio_set *bs,
@@ -81,6 +93,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
unsigned front_seg_size = bio->bi_seg_front_size;
bool do_split = true;
struct bio *new = NULL;
+ const unsigned max_sectors = get_max_io_size(q, bio);
bio_for_each_segment(bv, bio, iter) {
/*
@@ -90,20 +103,19 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
goto split;
- if (sectors + (bv.bv_len >> 9) >
- blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
+ if (sectors + (bv.bv_len >> 9) > max_sectors) {
/*
* Consider this a new segment if we're splitting in
* the middle of this vector.
*/
if (nsegs < queue_max_segments(q) &&
- sectors < blk_max_size_offset(q,
- bio->bi_iter.bi_sector)) {
+ sectors < max_sectors) {
nsegs++;
- sectors = blk_max_size_offset(q,
- bio->bi_iter.bi_sector);
+ sectors = max_sectors;
}
- goto split;
+ if (sectors)
+ goto split;
+ /* Make this single bvec as the 1st segment */
}
if (bvprvp && blk_queue_cluster(q)) {
@@ -292,7 +304,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
struct bio_vec end_bv = { NULL }, nxt_bv;
- struct bvec_iter iter;
if (!blk_queue_cluster(q))
return 0;
@@ -304,11 +315,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
if (!bio_has_data(bio))
return 1;
- bio_for_each_segment(end_bv, bio, iter)
- if (end_bv.bv_len == iter.bi_size)
- break;
-
- nxt_bv = bio_iovec(nxt);
+ bio_get_last_bvec(bio, &end_bv);
+ bio_get_first_bvec(nxt, &nxt_bv);
if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4c0622fae413..56c0a726b619 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -599,8 +599,10 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
* If a request wasn't started before the queue was
* marked dying, kill it here or it'll go unnoticed.
*/
- if (unlikely(blk_queue_dying(rq->q)))
- blk_mq_complete_request(rq, -EIO);
+ if (unlikely(blk_queue_dying(rq->q))) {
+ rq->errors = -EIO;
+ blk_mq_end_request(rq, rq->errors);
+ }
return;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dd4973583978..c7bb666aafd1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->virt_boundary_mask = 0;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
- lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
- BLK_SAFE_MAX_SECTORS;
+ lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ lim->max_dev_sectors = 0;
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e140cc487ce1..dd93763057ce 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -147,10 +147,9 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
{
- unsigned long long val;
- val = q->limits.max_hw_discard_sectors << 9;
- return sprintf(page, "%llu\n", val);
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_hw_discard_sectors << 9);
}
static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index a753df2b3fc2..d0dd7882d8c7 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -39,7 +39,6 @@ struct deadline_data {
*/
struct request *next_rq[2];
unsigned int batching; /* number of sequential requests made */
- sector_t last_sector; /* head position */
unsigned int starved; /* times reads have starved writes */
/*
@@ -210,8 +209,6 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
dd->next_rq[WRITE] = NULL;
dd->next_rq[data_dir] = deadline_latter_request(rq);
- dd->last_sector = rq_end_sector(rq);
-
/*
* take it off the sort and fifo list, move
* to dispatch queue
diff --git a/block/ioctl.c b/block/ioctl.c
index 77f5d17779d6..d8996bbd7f12 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
return true;
}
-
-static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
-{
- unsigned long arg;
- int rc = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
-
- if (get_user(arg, (int __user *)(argp)))
- return -EFAULT;
- arg = !!arg;
- if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
- return 0;
-
- if (arg)
- arg = S_DAX;
-
- if (arg && !blkdev_dax_capable(bdev))
- return -ENOTTY;
-
- inode_lock(bdev->bd_inode);
- if (bdev->bd_map_count == 0)
- inode_set_flags(bdev->bd_inode, arg, S_DAX);
- else
- rc = -EBUSY;
- inode_unlock(bdev->bd_inode);
- return rc;
-}
-#else
-static int blkdev_daxset(struct block_device *bdev, int arg)
-{
- if (arg)
- return -ENOTTY;
- return 0;
-}
#endif
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
@@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKTRACESETUP:
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
- case BLKDAXSET:
- return blkdev_daxset(bdev, arg);
case BLKDAXGET:
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
break;
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 746935a5973c..5d8701941054 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
+#include <linux/dax.h>
#include <linux/blktrace_api.h>
#include "partitions/check.h"
@@ -216,10 +217,21 @@ static void part_release(struct device *dev)
kfree(p);
}
+static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct hd_struct *part = dev_to_part(dev);
+
+ add_uevent_var(env, "PARTN=%u", part->partno);
+ if (part->info && part->info->volname[0])
+ add_uevent_var(env, "PARTNAME=%s", part->info->volname);
+ return 0;
+}
+
struct device_type part_type = {
.name = "partition",
.groups = part_attr_groups,
.release = part_release,
+ .uevent = part_uevent,
};
static void delete_partition_rcu_cb(struct rcu_head *head)
@@ -550,13 +562,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+ return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
+ NULL);
+}
+
+unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
+{
struct page *page;
- page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
- NULL);
+ /* don't populate page cache for dax capable devices */
+ if (IS_DAX(bdev->bd_inode))
+ page = read_dax_sector(bdev, n);
+ else
+ page = read_pagecache_sector(bdev, n);
+
if (!IS_ERR(page)) {
if (PageError(page))
goto fail;