From dd22f551ac0ad366f92f601835f6623b83adc331 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 7 Jan 2015 18:05:34 +0200 Subject: block: Change direct_access calling convention In order to support accesses to larger chunks of memory, pass in a 'size' parameter (counted in bytes), and return the amount available at that address. Add a new helper function, bdev_direct_access(), to handle common functionality including partition handling, checking the length requested is positive, checking for the sector being page-aligned, and checking the length of the request does not pass the end of the partition. Signed-off-by: Matthew Wilcox Reviewed-by: Jan Kara Reviewed-by: Boaz Harrosh Signed-off-by: Jens Axboe --- drivers/block/brd.c | 14 +++++++------- drivers/s390/block/dcssblk.c | 21 +++++++++------------ 2 files changed, 16 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 3598110d2cef..89e90ec52f28 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -370,25 +370,25 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, } #ifdef CONFIG_BLK_DEV_XIP -static int brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn) +static long brd_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, unsigned long *pfn, long size) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; if (!brd) return -ENODEV; - if (sector & (PAGE_SECTORS-1)) - return -EINVAL; - if (sector + PAGE_SECTORS > get_capacity(bdev->bd_disk)) - return -ERANGE; page = brd_insert_page(brd, sector); if (!page) return -ENOSPC; *kaddr = page_address(page); *pfn = page_to_pfn(page); - return 0; + /* + * TODO: If size > PAGE_SIZE, we could look to see if the next page in + * the file happens to be mapped to the next page of physical RAM. + */ + return PAGE_SIZE; } #endif diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b550c8c8d010..31d6884f3351 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -28,8 +28,8 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); -static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn); +static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, + void **kaddr, unsigned long *pfn, long size); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -866,25 +866,22 @@ fail: bio_io_error(bio); } -static int +static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn) + void **kaddr, unsigned long *pfn, long size) { struct dcssblk_dev_info *dev_info; - unsigned long pgoff; + unsigned long offset, dev_sz; dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; - if (secnum % (PAGE_SIZE/512)) - return -EINVAL; - pgoff = secnum / (PAGE_SIZE / 512); - if ((pgoff+1)*PAGE_SIZE-1 > dev_info->end - dev_info->start) - return -ERANGE; - *kaddr = (void *) (dev_info->start+pgoff*PAGE_SIZE); + dev_sz = dev_info->end - dev_info->start; + offset = secnum * 512; + *kaddr = (void *) (dev_info->start + offset); *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; - return 0; + return dev_sz - offset; } static void -- cgit From d93ba7a5a97c9f315bacdcdb8de4e5f368e7b396 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 20 Jan 2015 20:06:30 -0500 Subject: block: Add discard flag to blkdev_issue_zeroout() function blkdev_issue_discard() will zero a given block range. This is done by way of explicit writing, thus provisioning or allocating the blocks on disk. There are use cases where the desired behavior is to zero the blocks but unprovision them if possible. The blocks must deterministically contain zeroes when they are subsequently read back. This patch adds a flag to blkdev_issue_zeroout() that provides this variant. If the discard flag is set and a block device guarantees discard_zeroes_data we will use REQ_DISCARD to clear the block range. If the device does not support discard_zeroes_data or if the discard request fails we will fall back to first REQ_WRITE_SAME and then a regular REQ_WRITE. Also update the callers of blkdev_issue_zero() to reflect the new flag and make sb_issue_zeroout() prefer the discard approach. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d169b4a79267..cee20354ac37 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1388,7 +1388,7 @@ int drbd_submit_peer_request(struct drbd_device *device, list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); if (blkdev_issue_zeroout(device->ldev->backing_bdev, - sector, data_size >> 9, GFP_NOIO)) + sector, data_size >> 9, GFP_NOIO, false)) peer_req->flags |= EE_WAS_ERROR; drbd_endio_write_sec_final(peer_req); return 0; -- cgit From ee1b6f7aff94019c09e73837054979063f722046 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 15 Jan 2015 17:32:25 -0800 Subject: block: support different tag allocation policy The libata tag allocation is using a round-robin policy. Next patch will make libata use block generic tag allocation, so let's add a policy to tag allocation. Currently two policies: FIFO (default) and round-robin. Cc: Jens Axboe Cc: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/block/osdblk.c | 2 +- drivers/scsi/scsi_scan.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 79aa179305b5..e22942596207 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -423,7 +423,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) } /* switch queue to TCQ mode; allocate tag map */ - rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); + rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL, BLK_TAG_ALLOC_FIFO); if (rc) { blk_cleanup_queue(q); put_disk(disk); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 983aed10ff2f..921a8c897eb2 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -290,7 +290,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, if (!shost_use_blk_mq(sdev->host) && (shost->bqt || shost->hostt->use_blk_tags)) { blk_queue_init_tags(sdev->request_queue, - sdev->host->cmd_per_lun, shost->bqt); + sdev->host->cmd_per_lun, shost->bqt, + shost->hostt->tag_alloc_policy); } scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun); -- cgit From 24391c0dc57c3756a219defaa781e68637d6ab7d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 23 Jan 2015 14:18:00 -0700 Subject: blk-mq: add tag allocation policy This is the blk-mq part to support tag allocation policy. The default allocation policy isn't changed (though it's not a strict FIFO). The new policy is round-robin for libata. But it's a try-best implementation. If multiple tasks are competing, the tags returned will be mixed (which is unavoidable even with !mq, as requests from different tasks can be mixed in queue) Cc: Jens Axboe Cc: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ea95dd3e260..49ab11508286 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2188,6 +2188,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) shost->tag_set.cmd_size = cmd_size; shost->tag_set.numa_node = NUMA_NO_NODE; shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + shost->tag_set.flags |= + BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy); shost->tag_set.driver_data = shost; return blk_mq_alloc_tag_set(&shost->tag_set); -- cgit From febf71588c2a750e04dc2a8b0824ce120c48bd9e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 17 Oct 2014 17:46:35 -0600 Subject: block: require blk_rq_prep_clone() be given an initialized clone request Prepare to allow blk_rq_prep_clone() to accept clone requests that were allocated from blk-mq request queues. As such the blk_rq_prep_clone() caller must first initialize the clone request. Signed-off-by: Keith Busch Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b98cd9d84435..f251633a51af 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1719,6 +1719,7 @@ static int setup_clone(struct request *clone, struct request *rq, { int r; + blk_rq_init(NULL, rq); r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, dm_rq_bio_constructor, tio); if (r) -- cgit From 26e49cfc7e988a76bf1e55cef0d9e438e5489180 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 18 Jan 2015 16:16:31 +0100 Subject: block: pass iov_iter to the BLOCK_PC mapping functions Make use of a new interface provided by iov_iter, backed by scatter-gather list of iovec, instead of the old interface based on sg_iovec. Also use iov_iter_advance() instead of manual iteration. This commit should contain only literal replacements, without functional changes. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Doug Gilbert Cc: "James E.J. Bottomley" Signed-off-by: Kent Overstreet [dpark: add more description in commit message] Signed-off-by: Dongsu Park [hch: fixed to do a deep clone of the iov_iter, and to properly use the iov_iter direction] Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/scsi/sg.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index b14f64cb9724..4052e592818c 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1734,22 +1734,19 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } if (iov_count) { - int len, size = sizeof(struct sg_iovec) * iov_count; + int size = sizeof(struct iovec) * iov_count; struct iovec *iov; + struct iov_iter i; iov = memdup_user(hp->dxferp, size); if (IS_ERR(iov)) return PTR_ERR(iov); - len = iov_length(iov, iov_count); - if (hp->dxfer_len < len) { - iov_count = iov_shorten(iov, iov_count, hp->dxfer_len); - len = hp->dxfer_len; - } + iov_iter_init(&i, rw, iov, iov_count, + min_t(size_t, hp->dxfer_len, + iov_length(iov, iov_count))); - res = blk_rq_map_user_iov(q, rq, md, (struct sg_iovec *)iov, - iov_count, - len, GFP_ATOMIC); + res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC); kfree(iov); } else res = blk_rq_map_user(q, rq, md, hp->dxferp, -- cgit From db507b3ffd9b7a1c87e732ac6e2c3a5d0babb15a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 9 Feb 2015 12:21:54 -0500 Subject: dm: fix multipath regression due to initializing wrong request Commit febf715 ("block: require blk_rq_prep_clone() be given an initialized clone request") introduced a regression by calling blk_rq_init() on the original request rather than the clone request that is passed to setup_clone(). Signed-off-by: Mike Snitzer Fixes: febf71588c2a ("block: require blk_rq_prep_clone() be given an initialized clone request") Signed-off-by: Jens Axboe --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f251633a51af..71e6b73fe78d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1719,7 +1719,7 @@ static int setup_clone(struct request *clone, struct request *rq, { int r; - blk_rq_init(NULL, rq); + blk_rq_init(NULL, clone); r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, dm_rq_bio_constructor, tio); if (r) -- cgit