summaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkback/blkback.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback/blkback.c')
-rw-r--r--drivers/block/xen-blkback/blkback.c444
1 files changed, 256 insertions, 188 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index fe7cd58c43d0..a7c2b04ab943 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -62,8 +62,8 @@
* IO workloads.
*/
-static int xen_blkif_max_buffer_pages = 1024;
-module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+static int max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, max_buffer_pages, int, 0644);
MODULE_PARM_DESC(max_buffer_pages,
"Maximum number of free pages to keep in each block backend buffer");
@@ -78,12 +78,24 @@ MODULE_PARM_DESC(max_buffer_pages,
* algorithm.
*/
-static int xen_blkif_max_pgrants = 1056;
-module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+static int max_pgrants = 1056;
+module_param_named(max_persistent_grants, max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * How long a persistent grant is allowed to remain allocated without being in
+ * use. The time is in seconds, 0 means indefinitely long.
+ */
+
+static unsigned int pgrant_timeout = 60;
+module_param_named(persistent_grant_unused_seconds, pgrant_timeout,
+ uint, 0644);
+MODULE_PARM_DESC(persistent_grant_unused_seconds,
+ "Time in seconds an unused persistent grant is allowed to "
+ "remain allocated. Default is 60, 0 means unlimited.");
+
+/*
* Maximum number of rings/queues blkback supports, allow as many queues as there
* are CPUs if user has not specified a value.
*/
@@ -98,7 +110,7 @@ MODULE_PARM_DESC(max_queues,
* backend, 4KB page granularity is used.
*/
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
* The LRU mechanism to clean the lists of persistent grants needs to
@@ -120,70 +132,15 @@ module_param(log_stats, int, 0644);
#define BLKBACK_INVALID_HANDLE (~0)
-/* Number of free pages to remove on each call to gnttab_free_pages */
-#define NUM_BATCH_FREE_PAGES 10
-
-static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&ring->free_pages_lock, flags);
- if (list_empty(&ring->free_pages)) {
- BUG_ON(ring->free_pages_num != 0);
- spin_unlock_irqrestore(&ring->free_pages_lock, flags);
- return gnttab_alloc_pages(1, page);
- }
- BUG_ON(ring->free_pages_num == 0);
- page[0] = list_first_entry(&ring->free_pages, struct page, lru);
- list_del(&page[0]->lru);
- ring->free_pages_num--;
- spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-
- return 0;
-}
-
-static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
- int num)
+static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&ring->free_pages_lock, flags);
- for (i = 0; i < num; i++)
- list_add(&page[i]->lru, &ring->free_pages);
- ring->free_pages_num += num;
- spin_unlock_irqrestore(&ring->free_pages_lock, flags);
-}
-
-static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
-{
- /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
- struct page *page[NUM_BATCH_FREE_PAGES];
- unsigned int num_pages = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&ring->free_pages_lock, flags);
- while (ring->free_pages_num > num) {
- BUG_ON(list_empty(&ring->free_pages));
- page[num_pages] = list_first_entry(&ring->free_pages,
- struct page, lru);
- list_del(&page[num_pages]->lru);
- ring->free_pages_num--;
- if (++num_pages == NUM_BATCH_FREE_PAGES) {
- spin_unlock_irqrestore(&ring->free_pages_lock, flags);
- gnttab_free_pages(num_pages, page);
- spin_lock_irqsave(&ring->free_pages_lock, flags);
- num_pages = 0;
- }
- }
- spin_unlock_irqrestore(&ring->free_pages_lock, flags);
- if (num_pages != 0)
- gnttab_free_pages(num_pages, page);
+ return pgrant_timeout && (jiffies - persistent_gnt->last_used >=
+ HZ * pgrant_timeout);
}
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif_ring *ring);
+static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
@@ -215,7 +172,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *this;
struct xen_blkif *blkif = ring->blkif;
- if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
+ if (ring->persistent_gnt_c >= max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
@@ -236,8 +193,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
}
}
- bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
- set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->active = true;
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
@@ -261,11 +217,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
else if (gref > data->gnt)
node = node->rb_right;
else {
- if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+ if (data->active) {
pr_alert_ratelimited("requesting a grant already in use\n");
return NULL;
}
- set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+ data->active = true;
atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
@@ -276,16 +232,16 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
- if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!persistent_gnt->active)
pr_alert_ratelimited("freeing a grant already unused\n");
- set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
- clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+ persistent_gnt->last_used = jiffies;
+ persistent_gnt->active = false;
atomic_dec(&ring->persistent_gnt_in_use);
}
-static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
- unsigned int num)
+static void free_persistent_gnts(struct xen_blkif_ring *ring)
{
+ struct rb_root *root = &ring->persistent_gnts;
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
@@ -293,6 +249,9 @@ static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *ro
int segs_to_unmap = 0;
struct gntab_unmap_queue_data unmap_data;
+ if (RB_EMPTY_ROOT(root))
+ return;
+
unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;
@@ -314,15 +273,18 @@ static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *ro
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(ring, pages, segs_to_unmap);
+ gnttab_page_cache_put(&ring->free_pages, pages,
+ segs_to_unmap);
segs_to_unmap = 0;
}
rb_erase(&persistent_gnt->node, root);
kfree(persistent_gnt);
- num--;
+ ring->persistent_gnt_c--;
}
- BUG_ON(num != 0);
+
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ BUG_ON(ring->persistent_gnt_c != 0);
}
void xen_blkbk_unmap_purged_grants(struct work_struct *work)
@@ -354,7 +316,8 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(ring, pages, segs_to_unmap);
+ gnttab_page_cache_put(&ring->free_pages, pages,
+ segs_to_unmap);
segs_to_unmap = 0;
}
kfree(persistent_gnt);
@@ -362,7 +325,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
if (segs_to_unmap > 0) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(ring, pages, segs_to_unmap);
+ gnttab_page_cache_put(&ring->free_pages, pages, segs_to_unmap);
}
}
@@ -371,26 +334,25 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
unsigned int num_clean, total;
- bool scan_used = false, clean_used = false;
+ bool scan_used = false;
struct rb_root *root;
- if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
- (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
- !ring->blkif->vbd.overflow_max_grants)) {
- goto out;
- }
-
if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
goto out;
}
- num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(ring->persistent_gnt_c, num_clean);
- if ((num_clean == 0) ||
- (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
- goto out;
+ if (ring->persistent_gnt_c < max_pgrants ||
+ (ring->persistent_gnt_c == max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
+ num_clean = 0;
+ } else {
+ num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN;
+ num_clean = ring->persistent_gnt_c - max_pgrants + num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
+ pr_debug("Going to purge at least %u persistent grants\n",
+ num_clean);
+ }
/*
* At this point, we can assure that there will be no calls
@@ -401,9 +363,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
* number of grants.
*/
- total = num_clean;
-
- pr_debug("Going to purge %u persistent grants\n", num_clean);
+ total = 0;
BUG_ON(!list_empty(&ring->persistent_purge_list));
root = &ring->persistent_gnts;
@@ -412,46 +372,37 @@ purge_list:
BUG_ON(persistent_gnt->handle ==
BLKBACK_INVALID_HANDLE);
- if (clean_used) {
- clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+ if (persistent_gnt->active)
continue;
- }
-
- if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+ if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
continue;
- if (!scan_used &&
- (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+ if (scan_used && total >= num_clean)
continue;
rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
&ring->persistent_purge_list);
- if (--num_clean == 0)
- goto finished;
+ total++;
}
/*
- * If we get here it means we also need to start cleaning
+ * Check whether we also need to start cleaning
* grants that were used since last purge in order to cope
* with the requested num
*/
- if (!scan_used && !clean_used) {
- pr_debug("Still missing %u purged frames\n", num_clean);
+ if (!scan_used && total < num_clean) {
+ pr_debug("Still missing %u purged frames\n", num_clean - total);
scan_used = true;
goto purge_list;
}
-finished:
- if (!clean_used) {
- pr_debug("Finished scanning for grants to clean, removing used flag\n");
- clean_used = true;
- goto purge_list;
- }
- ring->persistent_gnt_c -= (total - num_clean);
- ring->blkif->vbd.overflow_max_grants = 0;
+ if (total) {
+ ring->persistent_gnt_c -= total;
+ ring->blkif->vbd.overflow_max_grants = 0;
- /* We can defer this work */
- schedule_work(&ring->persistent_purge_work);
- pr_debug("Purged %u/%u\n", (total - num_clean), total);
+ /* We can defer this work */
+ schedule_work(&ring->persistent_purge_work);
+ pr_debug("Purged %u/%u\n", num_clean, total);
+ }
out:
return;
@@ -496,7 +447,7 @@ static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
* Routines for managing virtual block devices (vbds).
*/
static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
- int operation)
+ enum req_op operation)
{
struct xen_vbd *vbd = &blkif->vbd;
int rc = -EACCES;
@@ -514,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
}
req->dev = vbd->pdevice;
- req->bdev = vbd->bdev;
+ req->bdev = file_bdev(vbd->bdev_file);
rc = 0;
out:
@@ -592,9 +543,8 @@ static void print_stats(struct xen_blkif_ring *ring)
current->comm, ring->st_oo_req,
ring->st_rd_req, ring->st_wr_req,
ring->st_f_req, ring->st_ds_req,
- ring->persistent_gnt_c,
- xen_blkif_max_pgrants);
- ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+ ring->persistent_gnt_c, max_pgrants);
+ ring->st_print = jiffies + secs_to_jiffies(10);
ring->st_rd_req = 0;
ring->st_wr_req = 0;
ring->st_oo_req = 0;
@@ -608,6 +558,8 @@ int xen_blkif_schedule(void *arg)
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
+ bool do_eoi;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
set_freezable();
while (!kthread_should_stop()) {
@@ -632,16 +584,23 @@ int xen_blkif_schedule(void *arg)
if (timeout == 0)
goto purge_gnt_list;
+ do_eoi = ring->waiting_reqs;
+
ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(ring);
+ ret = do_block_io_op(ring, &eoi_flags);
if (ret > 0)
ring->waiting_reqs = 1;
if (ret == -EACCES)
wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
+ if (do_eoi && !ring->waiting_reqs) {
+ xen_irq_lateeoi(ring->irq, eoi_flags);
+ eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
+ }
+
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
time_after(jiffies, ring->next_lru)) {
@@ -649,8 +608,12 @@ purge_gnt_list:
ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
}
- /* Shrink if we have more than xen_blkif_max_buffer_pages */
- shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
+ /* Shrink the free pages pool if it is too large. */
+ if (time_before(jiffies, blkif->buffer_squeeze_end))
+ gnttab_page_cache_shrink(&ring->free_pages, 0);
+ else
+ gnttab_page_cache_shrink(&ring->free_pages,
+ max_buffer_pages);
if (log_stats && time_after(jiffies, ring->st_print))
print_stats(ring);
@@ -673,15 +636,10 @@ purge_gnt_list:
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{
/* Free all persistent grant pages */
- if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
- free_persistent_gnts(ring, &ring->persistent_gnts,
- ring->persistent_gnt_c);
-
- BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
- ring->persistent_gnt_c = 0;
+ free_persistent_gnts(ring);
/* Since we are shutting down remove all pages from the buffer */
- shrink_free_pagepool(ring, 0 /* All */);
+ gnttab_page_cache_shrink(&ring->free_pages, 0 /* All */);
}
static unsigned int xen_blkbk_unmap_prepare(
@@ -705,9 +663,9 @@ static unsigned int xen_blkbk_unmap_prepare(
GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE;
invcount++;
- }
+ }
- return invcount;
+ return invcount;
}
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
@@ -720,7 +678,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
but is this the best way to deal with this? */
BUG_ON(result);
- put_free_pages(ring, data->pages, data->count);
+ gnttab_page_cache_put(&ring->free_pages, data->pages, data->count);
make_response(ring, pending_req->id,
pending_req->operation, pending_req->status);
free_req(ring, pending_req);
@@ -787,7 +745,8 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
if (invcount) {
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret);
- put_free_pages(ring, unmap_pages, invcount);
+ gnttab_page_cache_put(&ring->free_pages, unmap_pages,
+ invcount);
}
pages += batch;
num -= batch;
@@ -834,8 +793,14 @@ again:
pages[i]->page = persistent_gnt->page;
pages[i]->persistent_gnt = persistent_gnt;
} else {
- if (get_free_page(ring, &pages[i]->page))
- goto out_of_memory;
+ if (gnttab_page_cache_get(&ring->free_pages,
+ &pages[i]->page)) {
+ gnttab_page_cache_put(&ring->free_pages,
+ pages_to_gnt,
+ segs_to_map);
+ ret = -ENOMEM;
+ goto out;
+ }
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
pages[i]->persistent_gnt = NULL;
@@ -851,10 +816,8 @@ again:
break;
}
- if (segs_to_map) {
+ if (segs_to_map)
ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
- BUG_ON(ret);
- }
/*
* Now swizzle the MFN in our domain with the MFN from the other domain
@@ -867,9 +830,10 @@ again:
BUG_ON(new_map_idx >= segs_to_map);
if (unlikely(map[new_map_idx].status != 0)) {
pr_debug("invalid buffer -- could not remap it\n");
- put_free_pages(ring, &pages[seg_idx]->page, 1);
+ gnttab_page_cache_put(&ring->free_pages,
+ &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
- ret |= 1;
+ ret |= !ret;
goto next;
}
pages[seg_idx]->handle = map[new_map_idx].handle;
@@ -877,7 +841,7 @@ again:
continue;
}
if (use_persistent_gnts &&
- ring->persistent_gnt_c < xen_blkif_max_pgrants) {
+ ring->persistent_gnt_c < max_pgrants) {
/*
* We are using persistent grants, the grant is
* not mapped but we might have room for it.
@@ -904,7 +868,7 @@ again:
pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
persistent_gnt->gnt, ring->persistent_gnt_c,
- xen_blkif_max_pgrants);
+ max_pgrants);
goto next;
}
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -921,15 +885,18 @@ next:
}
segs_to_map = 0;
last_map = map_until;
- if (map_until != num)
+ if (!ret && map_until != num)
goto again;
- return ret;
+out:
+ for (i = last_map; i < num; i++) {
+ /* Don't zap current batch's valid persistent grants. */
+ if (i >= map_until)
+ pages[i]->persistent_gnt = NULL;
+ pages[i]->handle = BLKBACK_INVALID_HANDLE;
+ }
-out_of_memory:
- pr_alert("%s: out of memory\n", __func__);
- put_free_pages(ring, pages_to_gnt, segs_to_map);
- return -ENOMEM;
+ return ret;
}
static int xen_blkbk_map_seg(struct pending_req *pending_req)
@@ -964,7 +931,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
if (rc)
goto unmap;
- for (n = 0, i = 0; n < nseg; n++) {
+ for (n = 0; n < nseg; n++) {
uint8_t first_sect, last_sect;
if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
@@ -1002,8 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
int err = 0;
int status = BLKIF_RSP_OKAY;
struct xen_blkif *blkif = ring->blkif;
- struct block_device *bdev = blkif->vbd.bdev;
- unsigned long secure;
+ struct block_device *bdev = file_bdev(blkif->vbd.bdev_file);
struct phys_req preq;
xen_blkif_get(blkif);
@@ -1020,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
}
ring->st_ds_req++;
- secure = (blkif->vbd.discard_secure &&
- (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
- BLKDEV_DISCARD_SECURE : 0;
+ if (blkif->vbd.discard_secure &&
+ (req->u.discard.flag & BLKIF_DISCARD_SECURE))
+ err = blkdev_issue_secure_erase(bdev,
+ req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
+ else
+ err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
+ req->u.discard.nr_sectors, GFP_KERNEL);
- err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
- req->u.discard.nr_sectors,
- GFP_KERNEL, secure);
fail_response:
if (err == -EOPNOTSUPP) {
pr_debug("discard op failed, not supported\n");
@@ -1104,7 +1072,111 @@ static void end_block_io_op(struct bio *bio)
bio_put(bio);
}
+static void blkif_get_x86_32_req(struct blkif_request *dst,
+ const struct blkif_x86_32_request *src)
+{
+ unsigned int i, n;
+
+ dst->operation = READ_ONCE(src->operation);
+
+ switch (dst->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = READ_ONCE(src->u.rw.nr_segments);
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ n = min_t(unsigned int, BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ dst->u.rw.nr_segments);
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+
+ case BLKIF_OP_INDIRECT:
+ dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+ dst->u.indirect.nr_segments =
+ READ_ONCE(src->u.indirect.nr_segments);
+ dst->u.indirect.handle = src->u.indirect.handle;
+ dst->u.indirect.id = src->u.indirect.id;
+ dst->u.indirect.sector_number = src->u.indirect.sector_number;
+ n = min(MAX_INDIRECT_PAGES,
+ INDIRECT_PAGES(dst->u.indirect.nr_segments));
+ for (i = 0; i < n; i++)
+ dst->u.indirect.indirect_grefs[i] =
+ src->u.indirect.indirect_grefs[i];
+ break;
+
+ default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
+ break;
+ }
+}
+
+static void blkif_get_x86_64_req(struct blkif_request *dst,
+ const struct blkif_x86_64_request *src)
+{
+ unsigned int i, n;
+
+ dst->operation = READ_ONCE(src->operation);
+
+ switch (dst->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = READ_ONCE(src->u.rw.nr_segments);
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ n = min_t(unsigned int, BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ dst->u.rw.nr_segments);
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ case BLKIF_OP_INDIRECT:
+ dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+ dst->u.indirect.nr_segments =
+ READ_ONCE(src->u.indirect.nr_segments);
+ dst->u.indirect.handle = src->u.indirect.handle;
+ dst->u.indirect.id = src->u.indirect.id;
+ dst->u.indirect.sector_number = src->u.indirect.sector_number;
+ n = min(MAX_INDIRECT_PAGES,
+ INDIRECT_PAGES(dst->u.indirect.nr_segments));
+ for (i = 0; i < n; i++)
+ dst->u.indirect.indirect_grefs[i] =
+ src->u.indirect.indirect_grefs[i];
+ break;
+
+ default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
+ break;
+ }
+}
/*
* Function to copy the from the ring buffer the 'struct blkif_request'
@@ -1112,7 +1184,7 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif_ring *ring)
+__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
@@ -1135,6 +1207,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break;
+ /* We've seen a request, so clear spurious eoi flag. */
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (kthread_should_stop()) {
more_to_do = 1;
break;
@@ -1193,13 +1268,13 @@ done:
}
static int
-do_block_io_op(struct xen_blkif_ring *ring)
+do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(ring);
+ more_to_do = __do_block_io_op(ring, eoi_flags);
if (more_to_do)
break;
@@ -1222,8 +1297,8 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct bio *bio = NULL;
struct bio **biolist = pending_req->biolist;
int i, nbio = 0;
- int operation;
- int operation_flags = 0;
+ enum req_op operation;
+ blk_opf_t operation_flags = 0;
struct blk_plug plug;
bool drain = false;
struct grant_page **pages = pending_req->segments;
@@ -1251,6 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
+ fallthrough;
case BLKIF_OP_FLUSH_DISKCACHE:
ring->st_f_req++;
operation = REQ_OP_WRITE;
@@ -1355,18 +1431,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
pages[i]->page,
seg[i].nsec << 9,
seg[i].offset) == 0)) {
-
- int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
- bio = bio_alloc(GFP_KERNEL, nr_iovecs);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i),
+ operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
bio->bi_iter.bi_sector = preq.sector_number;
- bio_set_op_attrs(bio, operation, operation_flags);
}
preq.sector_number += seg[i].nsec;
@@ -1376,15 +1447,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
if (!bio) {
BUG_ON(operation_flags != REQ_PREFLUSH);
- bio = bio_alloc(GFP_KERNEL, 0);
- if (unlikely(bio == NULL))
- goto fail_put_bio;
-
+ bio = bio_alloc(preq.bdev, 0, operation | operation_flags,
+ GFP_KERNEL);
biolist[nbio++] = bio;
- bio->bi_bdev = preq.bdev;
bio->bi_private = pending_req;
bio->bi_end_io = end_block_io_op;
- bio_set_op_attrs(bio, operation, operation_flags);
}
atomic_set(&pending_req->pendcnt, nbio);
@@ -1412,14 +1479,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
-
- fail_put_bio:
- for (i = 0; i < nbio; i++)
- bio_put(biolist[i]);
- atomic_set(&pending_req->pendcnt, 1);
- __end_block_io_op(pending_req, BLK_STS_RESOURCE);
- msleep(1); /* back off a bit */
- return -EIO;
}
@@ -1496,5 +1555,14 @@ static int __init xen_blkif_init(void)
module_init(xen_blkif_init);
+static void __exit xen_blkif_fini(void)
+{
+ xen_blkif_xenbus_fini();
+ xen_blkif_interface_fini();
+}
+
+module_exit(xen_blkif_fini);
+
+MODULE_DESCRIPTION("Virtual block device back-end driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("xen-backend:vbd");