From b3cb0d6adc4bbc70b5e37e49a6068e973545ead7 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 1 Sep 2011 18:39:10 +0800 Subject: xen-blkback: Implement discard requests ('feature-discard') ..aka ATA TRIM/SCSI UNMAP command to be passed through the frontend and used as appropiately by the backend. We also advertise certain granulity parameters to the frontend so it can plug them in. If the backend is a realy device - we just end up using 'blkdev_issue_discard' while for loopback devices - we just punch a hole in the image file. Signed-off-by: Li Dongyang [v1: Fixed up pr_debug and commit description] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 86 +++++++++++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 14 deletions(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 2330a9ad5e95..9713d5a490e4 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include @@ -258,13 +261,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id) static void print_stats(struct xen_blkif *blkif) { - pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d\n", + pr_info("xen-blkback (%s): oo %3d | rd %4d | wr %4d | f %4d" + " | ds %4d\n", current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_f_req); + blkif->st_rd_req, blkif->st_wr_req, + blkif->st_f_req, blkif->st_ds_req); blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); blkif->st_rd_req = 0; blkif->st_wr_req = 0; blkif->st_oo_req = 0; + blkif->st_ds_req = 0; } int xen_blkif_schedule(void *arg) @@ -410,6 +416,42 @@ static int xen_blkbk_map(struct blkif_request *req, return ret; } +static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) +{ + int err = 0; + int status = BLKIF_RSP_OKAY; + struct block_device *bdev = blkif->vbd.bdev; + + if (blkif->blk_backend_type == BLKIF_BACKEND_PHY) + /* just forward the discard request */ + err = blkdev_issue_discard(bdev, + req->u.discard.sector_number, + req->u.discard.nr_sectors, + GFP_KERNEL, 0); + else if (blkif->blk_backend_type == BLKIF_BACKEND_FILE) { + /* punch a hole in the backing file */ + struct loop_device *lo = bdev->bd_disk->private_data; + struct file *file = lo->lo_backing_file; + + if (file->f_op->fallocate) + err = file->f_op->fallocate(file, + FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + req->u.discard.sector_number << 9, + req->u.discard.nr_sectors << 9); + else + err = -EOPNOTSUPP; + } else + err = -EOPNOTSUPP; + + if (err == -EOPNOTSUPP) { + pr_debug(DRV_PFX "discard op failed, not supported\n"); + status = BLKIF_RSP_EOPNOTSUPP; + } else if (err) + status = BLKIF_RSP_ERROR; + + make_response(blkif, req->id, req->operation, status); +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -563,6 +605,10 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_f_req++; operation = WRITE_FLUSH; break; + case BLKIF_OP_DISCARD: + blkif->st_ds_req++; + operation = REQ_DISCARD; + break; case BLKIF_OP_WRITE_BARRIER: default: operation = 0; /* make gcc happy */ @@ -572,7 +618,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, /* Check that the number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation != WRITE_FLUSH && + operation != REQ_DISCARD) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", nseg); @@ -627,10 +674,14 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (xen_blkbk_map(req, pending_req, seg)) + if (operation != BLKIF_OP_DISCARD && + xen_blkbk_map(req, pending_req, seg)) goto fail_flush; - /* This corresponding xen_blkif_put is done in __end_block_io_op */ + /* + * This corresponding xen_blkif_put is done in __end_block_io_op, or + * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. + */ xen_blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -654,18 +705,25 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, preq.sector_number += seg[i].nsec; } - /* This will be hit if the operation was a flush. */ + /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation != WRITE_FLUSH); + BUG_ON(operation != WRITE_FLUSH && operation != REQ_DISCARD); - bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; + if (operation == WRITE_FLUSH) { + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; - biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; + biolist[nbio++] = bio; + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + } else if (operation == REQ_DISCARD) { + xen_blk_discard(blkif, req); + xen_blkif_put(blkif); + free_req(pending_req); + return 0; + } } /* -- cgit From 8e6dc6fe51957116d363204a725c1262b4b78e19 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 16 Sep 2011 08:38:09 +0100 Subject: xen-blkback: use kzalloc() in favor of kmalloc()+memset() This fixes the problem of three of those four memset()-s having improper size arguments passed: Sizeof a pointer-typed expression returns the size of the pointer, not that of the pointed to data. It also reverts using kmalloc() instead of kzalloc() for the allocation of the pending grant handles array, as that array gets fully initialized in a subsequent loop. Reported-by: Julia Lawall Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9713d5a490e4..e0dab614049c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -823,9 +823,9 @@ static int __init xen_blkif_init(void) mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * + blkbk->pending_reqs = kzalloc(sizeof(blkbk->pending_reqs[0]) * xen_blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); @@ -848,8 +848,6 @@ static int __init xen_blkif_init(void) if (rc) goto failed_init; - memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); - INIT_LIST_HEAD(&blkbk->pending_free); spin_lock_init(&blkbk->pending_free_lock); init_waitqueue_head(&blkbk->pending_free_wq); -- cgit From 29bde093787f3bdf7b9b4270ada6be7c8076e36b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 00:42:22 -0400 Subject: xen/blkback: Support 'feature-barrier' aka old-style BARRIER requests. We emulate the barrier requests by draining the outstanding bio's and then sending the WRITE_FLUSH command. To drain the I/Os we use the refcnt that is used during disconnect to wait for all the I/Os before disconnecting from the frontend. We latch on its value and if it reaches either the threshold for disconnect or when there are no more outstanding I/Os, then we have drained all I/Os. Suggested-by: Christopher Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index e0dab614049c..184b1335c8e9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -452,6 +452,23 @@ static void xen_blk_discard(struct xen_blkif *blkif, struct blkif_request *req) make_response(blkif, req->id, req->operation, status); } +static void xen_blk_drain_io(struct xen_blkif *blkif) +{ + atomic_set(&blkif->drain, 1); + do { + wait_for_completion_interruptible_timeout( + &blkif->drain_complete, HZ); + + if (!atomic_read(&blkif->drain)) + break; + /* The initial value is one, and one refcnt taken at the + * start of the xen_blkif_schedule thread. */ + if (atomic_read(&blkif->refcnt) <= 2) + break; + } while (!kthread_should_stop()); + atomic_set(&blkif->drain, 0); +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -464,6 +481,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + pr_debug(DRV_PFX "write barrier op failed, not supported\n"); + xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; } else if (error) { pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," " error=%d\n", error); @@ -481,6 +503,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); free_req(pending_req); + if (atomic_read(&pending_req->blkif->refcnt) <= 2) { + if (atomic_read(&pending_req->blkif->drain)) + complete(&pending_req->blkif->drain_complete); + } } } @@ -574,7 +600,6 @@ do_block_io_op(struct xen_blkif *blkif) return more_to_do; } - /* * Transmutation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlying storage. @@ -591,6 +616,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, int i, nbio = 0; int operation; struct blk_plug plug; + bool drain = false; switch (req->operation) { case BLKIF_OP_READ: @@ -601,6 +627,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_wr_req++; operation = WRITE_ODIRECT; break; + case BLKIF_OP_WRITE_BARRIER: + drain = true; case BLKIF_OP_FLUSH_DISKCACHE: blkif->st_f_req++; operation = WRITE_FLUSH; @@ -609,7 +637,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, blkif->st_ds_req++; operation = REQ_DISCARD; break; - case BLKIF_OP_WRITE_BARRIER: default: operation = 0; /* make gcc happy */ goto fail_response; @@ -668,6 +695,12 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, } } + /* Wait on all outstanding I/O's and once that has been completed + * issue the WRITE_FLUSH. + */ + if (drain) + xen_blk_drain_io(pending_req->blkif); + /* * If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform -- cgit From 5c62cb48602dba95159c81ffeca179d3852e25be Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 12:33:21 -0400 Subject: xen/blkback: Report VBD_WSECT (wr_sect) properly. We did not increment the amount of sectors written to disk b/c we tested for the == WRITE which is incorrect - as the operations are more of WRITE_FLUSH, WRITE_ODIRECT. This patch fixes it by doing a & WRITE check. CC: stable@kernel.org Reported-by: Andy Burns Suggested-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 184b1335c8e9..c15c559e8662 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -776,7 +776,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_FLUSH) + else if (operation & WRITE) blkif->st_wr_sect += preq.nr_sects; return 0; -- cgit From 64391b2536ca92f9c589b2bfeaca3954896fe057 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 10 Oct 2011 00:47:49 -0400 Subject: xen/blkback: Fix the inhibition to map pages when discarding sector ranges. The 'operation' parameters are the ones provided to the bio layer while the req->operation are the ones passed in between the backend and frontend. We used the wrong 'operation' value to squash the call to map pages when processing the discard operation resulting in an hypercall that did nothing. Lets guard against going in the mapping function by checking for the proper operation type. CC: Li Dongyang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c15c559e8662..53c81de6f886 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -707,8 +707,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation != BLKIF_OP_DISCARD && - xen_blkbk_map(req, pending_req, seg)) + if (operation == REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* -- cgit From dda1852802a6cc6fdecb9021e491b2de680c76b9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 14 Oct 2011 12:13:05 -0400 Subject: xen/blkback: Check for proper operation. The patch titled: "xen/blkback: Fix the inhibition to map pages when discarding sector ranges." had the right idea except that it used the wrong comparison operator. It had == instead of !=. This fixes the bug where all (except discard) operations would have been ignored. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 53c81de6f886..a1ee2659d2bc 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -707,7 +707,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, * the hypercall to unmap the grants - that is all done in * xen_blkbk_unmap. */ - if (operation == REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) + if (operation != REQ_DISCARD && xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* -- cgit From 6927d92091df2848fc0e6a693a017d4b2df549c2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 17 Oct 2011 14:27:48 -0400 Subject: xen/blkback: Fix two races in the handling of barrier requests. There are two windows of opportunity to cause a race when processing a barrier request. This patch fixes this. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/blkback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/block/xen-blkback/blkback.c') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index a1ee2659d2bc..79efec24569b 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -456,15 +456,15 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) { atomic_set(&blkif->drain, 1); do { + /* The initial value is one, and one refcnt taken at the + * start of the xen_blkif_schedule thread. */ + if (atomic_read(&blkif->refcnt) <= 2) + break; wait_for_completion_interruptible_timeout( &blkif->drain_complete, HZ); if (!atomic_read(&blkif->drain)) break; - /* The initial value is one, and one refcnt taken at the - * start of the xen_blkif_schedule thread. */ - if (atomic_read(&blkif->refcnt) <= 2) - break; } while (!kthread_should_stop()); atomic_set(&blkif->drain, 0); } @@ -502,11 +502,11 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); xen_blkif_put(pending_req->blkif); - free_req(pending_req); if (atomic_read(&pending_req->blkif->refcnt) <= 2) { if (atomic_read(&pending_req->blkif->drain)) complete(&pending_req->blkif->drain_complete); } + free_req(pending_req); } } -- cgit