diff options
Diffstat (limited to 'drivers/block/xen-blkback/blkback.c')
| -rw-r--r-- | drivers/block/xen-blkback/blkback.c | 444 |
1 files changed, 256 insertions, 188 deletions
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index fe7cd58c43d0..a7c2b04ab943 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -62,8 +62,8 @@ * IO workloads. */ -static int xen_blkif_max_buffer_pages = 1024; -module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644); +static int max_buffer_pages = 1024; +module_param_named(max_buffer_pages, max_buffer_pages, int, 0644); MODULE_PARM_DESC(max_buffer_pages, "Maximum number of free pages to keep in each block backend buffer"); @@ -78,12 +78,24 @@ MODULE_PARM_DESC(max_buffer_pages, * algorithm. */ -static int xen_blkif_max_pgrants = 1056; -module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); +static int max_pgrants = 1056; +module_param_named(max_persistent_grants, max_pgrants, int, 0644); MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); /* + * How long a persistent grant is allowed to remain allocated without being in + * use. The time is in seconds, 0 means indefinitely long. + */ + +static unsigned int pgrant_timeout = 60; +module_param_named(persistent_grant_unused_seconds, pgrant_timeout, + uint, 0644); +MODULE_PARM_DESC(persistent_grant_unused_seconds, + "Time in seconds an unused persistent grant is allowed to " + "remain allocated. Default is 60, 0 means unlimited."); + +/* * Maximum number of rings/queues blkback supports, allow as many queues as there * are CPUs if user has not specified a value. */ @@ -98,7 +110,7 @@ MODULE_PARM_DESC(max_queues, * backend, 4KB page granularity is used. */ unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; -module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); /* * The LRU mechanism to clean the lists of persistent grants needs to @@ -120,70 +132,15 @@ module_param(log_stats, int, 0644); #define BLKBACK_INVALID_HANDLE (~0) -/* Number of free pages to remove on each call to gnttab_free_pages */ -#define NUM_BATCH_FREE_PAGES 10 - -static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page) -{ - unsigned long flags; - - spin_lock_irqsave(&ring->free_pages_lock, flags); - if (list_empty(&ring->free_pages)) { - BUG_ON(ring->free_pages_num != 0); - spin_unlock_irqrestore(&ring->free_pages_lock, flags); - return gnttab_alloc_pages(1, page); - } - BUG_ON(ring->free_pages_num == 0); - page[0] = list_first_entry(&ring->free_pages, struct page, lru); - list_del(&page[0]->lru); - ring->free_pages_num--; - spin_unlock_irqrestore(&ring->free_pages_lock, flags); - - return 0; -} - -static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page, - int num) +static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt) { - unsigned long flags; - int i; - - spin_lock_irqsave(&ring->free_pages_lock, flags); - for (i = 0; i < num; i++) - list_add(&page[i]->lru, &ring->free_pages); - ring->free_pages_num += num; - spin_unlock_irqrestore(&ring->free_pages_lock, flags); -} - -static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) -{ - /* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */ - struct page *page[NUM_BATCH_FREE_PAGES]; - unsigned int num_pages = 0; - unsigned long flags; - - spin_lock_irqsave(&ring->free_pages_lock, flags); - while (ring->free_pages_num > num) { - BUG_ON(list_empty(&ring->free_pages)); - page[num_pages] = list_first_entry(&ring->free_pages, - struct page, lru); - list_del(&page[num_pages]->lru); - ring->free_pages_num--; - if (++num_pages == NUM_BATCH_FREE_PAGES) { - spin_unlock_irqrestore(&ring->free_pages_lock, flags); - gnttab_free_pages(num_pages, page); - spin_lock_irqsave(&ring->free_pages_lock, flags); - num_pages = 0; - } - } - spin_unlock_irqrestore(&ring->free_pages_lock, flags); - if (num_pages != 0) - gnttab_free_pages(num_pages, page); + return pgrant_timeout && (jiffies - persistent_gnt->last_used >= + HZ * pgrant_timeout); } #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) -static int do_block_io_op(struct xen_blkif_ring *ring); +static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags); static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct blkif_request *req, struct pending_req *pending_req); @@ -215,7 +172,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *this; struct xen_blkif *blkif = ring->blkif; - if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) { + if (ring->persistent_gnt_c >= max_pgrants) { if (!blkif->vbd.overflow_max_grants) blkif->vbd.overflow_max_grants = 1; return -EBUSY; @@ -236,8 +193,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring, } } - bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE); - set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->active = true; /* Add new node and rebalance tree. */ rb_link_node(&(persistent_gnt->node), parent, new); rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts); @@ -261,11 +217,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, else if (gref > data->gnt) node = node->rb_right; else { - if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { + if (data->active) { pr_alert_ratelimited("requesting a grant already in use\n"); return NULL; } - set_bit(PERSISTENT_GNT_ACTIVE, data->flags); + data->active = true; atomic_inc(&ring->persistent_gnt_in_use); return data; } @@ -276,16 +232,16 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring, static void put_persistent_gnt(struct xen_blkif_ring *ring, struct persistent_gnt *persistent_gnt) { - if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!persistent_gnt->active) pr_alert_ratelimited("freeing a grant already unused\n"); - set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); - clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); + persistent_gnt->last_used = jiffies; + persistent_gnt->active = false; atomic_dec(&ring->persistent_gnt_in_use); } -static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root, - unsigned int num) +static void free_persistent_gnts(struct xen_blkif_ring *ring) { + struct rb_root *root = &ring->persistent_gnts; struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; @@ -293,6 +249,9 @@ static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *ro int segs_to_unmap = 0; struct gntab_unmap_queue_data unmap_data; + if (RB_EMPTY_ROOT(root)) + return; + unmap_data.pages = pages; unmap_data.unmap_ops = unmap; unmap_data.kunmap_ops = NULL; @@ -314,15 +273,18 @@ static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *ro unmap_data.count = segs_to_unmap; BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); - put_free_pages(ring, pages, segs_to_unmap); + gnttab_page_cache_put(&ring->free_pages, pages, + segs_to_unmap); segs_to_unmap = 0; } rb_erase(&persistent_gnt->node, root); kfree(persistent_gnt); - num--; + ring->persistent_gnt_c--; } - BUG_ON(num != 0); + + BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts)); + BUG_ON(ring->persistent_gnt_c != 0); } void xen_blkbk_unmap_purged_grants(struct work_struct *work) @@ -354,7 +316,8 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) { unmap_data.count = segs_to_unmap; BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); - put_free_pages(ring, pages, segs_to_unmap); + gnttab_page_cache_put(&ring->free_pages, pages, + segs_to_unmap); segs_to_unmap = 0; } kfree(persistent_gnt); @@ -362,7 +325,7 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work) if (segs_to_unmap > 0) { unmap_data.count = segs_to_unmap; BUG_ON(gnttab_unmap_refs_sync(&unmap_data)); - put_free_pages(ring, pages, segs_to_unmap); + gnttab_page_cache_put(&ring->free_pages, pages, segs_to_unmap); } } @@ -371,26 +334,25 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) struct persistent_gnt *persistent_gnt; struct rb_node *n; unsigned int num_clean, total; - bool scan_used = false, clean_used = false; + bool scan_used = false; struct rb_root *root; - if (ring->persistent_gnt_c < xen_blkif_max_pgrants || - (ring->persistent_gnt_c == xen_blkif_max_pgrants && - !ring->blkif->vbd.overflow_max_grants)) { - goto out; - } - if (work_busy(&ring->persistent_purge_work)) { pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n"); goto out; } - num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN; - num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean; - num_clean = min(ring->persistent_gnt_c, num_clean); - if ((num_clean == 0) || - (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use)))) - goto out; + if (ring->persistent_gnt_c < max_pgrants || + (ring->persistent_gnt_c == max_pgrants && + !ring->blkif->vbd.overflow_max_grants)) { + num_clean = 0; + } else { + num_clean = (max_pgrants / 100) * LRU_PERCENT_CLEAN; + num_clean = ring->persistent_gnt_c - max_pgrants + num_clean; + num_clean = min(ring->persistent_gnt_c, num_clean); + pr_debug("Going to purge at least %u persistent grants\n", + num_clean); + } /* * At this point, we can assure that there will be no calls @@ -401,9 +363,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring) * number of grants. */ - total = num_clean; - - pr_debug("Going to purge %u persistent grants\n", num_clean); + total = 0; BUG_ON(!list_empty(&ring->persistent_purge_list)); root = &ring->persistent_gnts; @@ -412,46 +372,37 @@ purge_list: BUG_ON(persistent_gnt->handle == BLKBACK_INVALID_HANDLE); - if (clean_used) { - clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); + if (persistent_gnt->active) continue; - } - - if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) + if (!scan_used && !persistent_gnt_timeout(persistent_gnt)) continue; - if (!scan_used && - (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags))) + if (scan_used && total >= num_clean) continue; rb_erase(&persistent_gnt->node, root); list_add(&persistent_gnt->remove_node, &ring->persistent_purge_list); - if (--num_clean == 0) - goto finished; + total++; } /* - * If we get here it means we also need to start cleaning + * Check whether we also need to start cleaning * grants that were used since last purge in order to cope * with the requested num */ - if (!scan_used && !clean_used) { - pr_debug("Still missing %u purged frames\n", num_clean); + if (!scan_used && total < num_clean) { + pr_debug("Still missing %u purged frames\n", num_clean - total); scan_used = true; goto purge_list; } -finished: - if (!clean_used) { - pr_debug("Finished scanning for grants to clean, removing used flag\n"); - clean_used = true; - goto purge_list; - } - ring->persistent_gnt_c -= (total - num_clean); - ring->blkif->vbd.overflow_max_grants = 0; + if (total) { + ring->persistent_gnt_c -= total; + ring->blkif->vbd.overflow_max_grants = 0; - /* We can defer this work */ - schedule_work(&ring->persistent_purge_work); - pr_debug("Purged %u/%u\n", (total - num_clean), total); + /* We can defer this work */ + schedule_work(&ring->persistent_purge_work); + pr_debug("Purged %u/%u\n", num_clean, total); + } out: return; @@ -496,7 +447,7 @@ static void free_req(struct xen_blkif_ring *ring, struct pending_req *req) * Routines for managing virtual block devices (vbds). */ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, - int operation) + enum req_op operation) { struct xen_vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -514,7 +465,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, } req->dev = vbd->pdevice; - req->bdev = vbd->bdev; + req->bdev = file_bdev(vbd->bdev_file); rc = 0; out: @@ -592,9 +543,8 @@ static void print_stats(struct xen_blkif_ring *ring) current->comm, ring->st_oo_req, ring->st_rd_req, ring->st_wr_req, ring->st_f_req, ring->st_ds_req, - ring->persistent_gnt_c, - xen_blkif_max_pgrants); - ring->st_print = jiffies + msecs_to_jiffies(10 * 1000); + ring->persistent_gnt_c, max_pgrants); + ring->st_print = jiffies + secs_to_jiffies(10); ring->st_rd_req = 0; ring->st_wr_req = 0; ring->st_oo_req = 0; @@ -608,6 +558,8 @@ int xen_blkif_schedule(void *arg) struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; int ret; + bool do_eoi; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; set_freezable(); while (!kthread_should_stop()) { @@ -632,16 +584,23 @@ int xen_blkif_schedule(void *arg) if (timeout == 0) goto purge_gnt_list; + do_eoi = ring->waiting_reqs; + ring->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - ret = do_block_io_op(ring); + ret = do_block_io_op(ring, &eoi_flags); if (ret > 0) ring->waiting_reqs = 1; if (ret == -EACCES) wait_event_interruptible(ring->shutdown_wq, kthread_should_stop()); + if (do_eoi && !ring->waiting_reqs) { + xen_irq_lateeoi(ring->irq, eoi_flags); + eoi_flags |= XEN_EOI_FLAG_SPURIOUS; + } + purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && time_after(jiffies, ring->next_lru)) { @@ -649,8 +608,12 @@ purge_gnt_list: ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL); } - /* Shrink if we have more than xen_blkif_max_buffer_pages */ - shrink_free_pagepool(ring, xen_blkif_max_buffer_pages); + /* Shrink the free pages pool if it is too large. */ + if (time_before(jiffies, blkif->buffer_squeeze_end)) + gnttab_page_cache_shrink(&ring->free_pages, 0); + else + gnttab_page_cache_shrink(&ring->free_pages, + max_buffer_pages); if (log_stats && time_after(jiffies, ring->st_print)) print_stats(ring); @@ -673,15 +636,10 @@ purge_gnt_list: void xen_blkbk_free_caches(struct xen_blkif_ring *ring) { /* Free all persistent grant pages */ - if (!RB_EMPTY_ROOT(&ring->persistent_gnts)) - free_persistent_gnts(ring, &ring->persistent_gnts, - ring->persistent_gnt_c); - - BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts)); - ring->persistent_gnt_c = 0; + free_persistent_gnts(ring); /* Since we are shutting down remove all pages from the buffer */ - shrink_free_pagepool(ring, 0 /* All */); + gnttab_page_cache_shrink(&ring->free_pages, 0 /* All */); } static unsigned int xen_blkbk_unmap_prepare( @@ -705,9 +663,9 @@ static unsigned int xen_blkbk_unmap_prepare( GNTMAP_host_map, pages[i]->handle); pages[i]->handle = BLKBACK_INVALID_HANDLE; invcount++; - } + } - return invcount; + return invcount; } static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) @@ -720,7 +678,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_ but is this the best way to deal with this? */ BUG_ON(result); - put_free_pages(ring, data->pages, data->count); + gnttab_page_cache_put(&ring->free_pages, data->pages, data->count); make_response(ring, pending_req->id, pending_req->operation, pending_req->status); free_req(ring, pending_req); @@ -787,7 +745,8 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring, if (invcount) { ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); BUG_ON(ret); - put_free_pages(ring, unmap_pages, invcount); + gnttab_page_cache_put(&ring->free_pages, unmap_pages, + invcount); } pages += batch; num -= batch; @@ -834,8 +793,14 @@ again: pages[i]->page = persistent_gnt->page; pages[i]->persistent_gnt = persistent_gnt; } else { - if (get_free_page(ring, &pages[i]->page)) - goto out_of_memory; + if (gnttab_page_cache_get(&ring->free_pages, + &pages[i]->page)) { + gnttab_page_cache_put(&ring->free_pages, + pages_to_gnt, + segs_to_map); + ret = -ENOMEM; + goto out; + } addr = vaddr(pages[i]->page); pages_to_gnt[segs_to_map] = pages[i]->page; pages[i]->persistent_gnt = NULL; @@ -851,10 +816,8 @@ again: break; } - if (segs_to_map) { + if (segs_to_map) ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); - BUG_ON(ret); - } /* * Now swizzle the MFN in our domain with the MFN from the other domain @@ -867,9 +830,10 @@ again: BUG_ON(new_map_idx >= segs_to_map); if (unlikely(map[new_map_idx].status != 0)) { pr_debug("invalid buffer -- could not remap it\n"); - put_free_pages(ring, &pages[seg_idx]->page, 1); + gnttab_page_cache_put(&ring->free_pages, + &pages[seg_idx]->page, 1); pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + ret |= !ret; goto next; } pages[seg_idx]->handle = map[new_map_idx].handle; @@ -877,7 +841,7 @@ again: continue; } if (use_persistent_gnts && - ring->persistent_gnt_c < xen_blkif_max_pgrants) { + ring->persistent_gnt_c < max_pgrants) { /* * We are using persistent grants, the grant is * not mapped but we might have room for it. @@ -904,7 +868,7 @@ again: pages[seg_idx]->persistent_gnt = persistent_gnt; pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n", persistent_gnt->gnt, ring->persistent_gnt_c, - xen_blkif_max_pgrants); + max_pgrants); goto next; } if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { @@ -921,15 +885,18 @@ next: } segs_to_map = 0; last_map = map_until; - if (map_until != num) + if (!ret && map_until != num) goto again; - return ret; +out: + for (i = last_map; i < num; i++) { + /* Don't zap current batch's valid persistent grants. */ + if (i >= map_until) + pages[i]->persistent_gnt = NULL; + pages[i]->handle = BLKBACK_INVALID_HANDLE; + } -out_of_memory: - pr_alert("%s: out of memory\n", __func__); - put_free_pages(ring, pages_to_gnt, segs_to_map); - return -ENOMEM; + return ret; } static int xen_blkbk_map_seg(struct pending_req *pending_req) @@ -964,7 +931,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, if (rc) goto unmap; - for (n = 0, i = 0; n < nseg; n++) { + for (n = 0; n < nseg; n++) { uint8_t first_sect, last_sect; if ((n % SEGS_PER_INDIRECT_FRAME) == 0) { @@ -1002,8 +969,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, int err = 0; int status = BLKIF_RSP_OKAY; struct xen_blkif *blkif = ring->blkif; - struct block_device *bdev = blkif->vbd.bdev; - unsigned long secure; + struct block_device *bdev = file_bdev(blkif->vbd.bdev_file); struct phys_req preq; xen_blkif_get(blkif); @@ -1020,13 +986,15 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring, } ring->st_ds_req++; - secure = (blkif->vbd.discard_secure && - (req->u.discard.flag & BLKIF_DISCARD_SECURE)) ? - BLKDEV_DISCARD_SECURE : 0; + if (blkif->vbd.discard_secure && + (req->u.discard.flag & BLKIF_DISCARD_SECURE)) + err = blkdev_issue_secure_erase(bdev, + req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); + else + err = blkdev_issue_discard(bdev, req->u.discard.sector_number, + req->u.discard.nr_sectors, GFP_KERNEL); - err = blkdev_issue_discard(bdev, req->u.discard.sector_number, - req->u.discard.nr_sectors, - GFP_KERNEL, secure); fail_response: if (err == -EOPNOTSUPP) { pr_debug("discard op failed, not supported\n"); @@ -1104,7 +1072,111 @@ static void end_block_io_op(struct bio *bio) bio_put(bio); } +static void blkif_get_x86_32_req(struct blkif_request *dst, + const struct blkif_x86_32_request *src) +{ + unsigned int i, n; + + dst->operation = READ_ONCE(src->operation); + + switch (dst->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = READ_ONCE(src->u.rw.nr_segments); + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; + dst->u.rw.sector_number = src->u.rw.sector_number; + n = min_t(unsigned int, BLKIF_MAX_SEGMENTS_PER_REQUEST, + dst->u.rw.nr_segments); + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->u.rw.seg[i]; + break; + + case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; + + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = + READ_ONCE(src->u.indirect.nr_segments); + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + n = min(MAX_INDIRECT_PAGES, + INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < n; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; + + default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; + break; + } +} + +static void blkif_get_x86_64_req(struct blkif_request *dst, + const struct blkif_x86_64_request *src) +{ + unsigned int i, n; + + dst->operation = READ_ONCE(src->operation); + + switch (dst->operation) { + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + case BLKIF_OP_WRITE_BARRIER: + case BLKIF_OP_FLUSH_DISKCACHE: + dst->u.rw.nr_segments = READ_ONCE(src->u.rw.nr_segments); + dst->u.rw.handle = src->u.rw.handle; + dst->u.rw.id = src->u.rw.id; + dst->u.rw.sector_number = src->u.rw.sector_number; + n = min_t(unsigned int, BLKIF_MAX_SEGMENTS_PER_REQUEST, + dst->u.rw.nr_segments); + for (i = 0; i < n; i++) + dst->u.rw.seg[i] = src->u.rw.seg[i]; + break; + + case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; + dst->u.discard.id = src->u.discard.id; + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; + case BLKIF_OP_INDIRECT: + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; + dst->u.indirect.nr_segments = + READ_ONCE(src->u.indirect.nr_segments); + dst->u.indirect.handle = src->u.indirect.handle; + dst->u.indirect.id = src->u.indirect.id; + dst->u.indirect.sector_number = src->u.indirect.sector_number; + n = min(MAX_INDIRECT_PAGES, + INDIRECT_PAGES(dst->u.indirect.nr_segments)); + for (i = 0; i < n; i++) + dst->u.indirect.indirect_grefs[i] = + src->u.indirect.indirect_grefs[i]; + break; + + default: + /* + * Don't know how to translate this op. Only get the + * ID so failure can be reported to the frontend. + */ + dst->u.other.id = src->u.other.id; + break; + } +} /* * Function to copy the from the ring buffer the 'struct blkif_request' @@ -1112,7 +1184,7 @@ static void end_block_io_op(struct bio *bio) * and transmute it to the block API to hand it over to the proper block disk. */ static int -__do_block_io_op(struct xen_blkif_ring *ring) +__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; struct blkif_request req; @@ -1135,6 +1207,9 @@ __do_block_io_op(struct xen_blkif_ring *ring) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + /* We've seen a request, so clear spurious eoi flag. */ + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (kthread_should_stop()) { more_to_do = 1; break; @@ -1193,13 +1268,13 @@ done: } static int -do_block_io_op(struct xen_blkif_ring *ring) +do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; int more_to_do; do { - more_to_do = __do_block_io_op(ring); + more_to_do = __do_block_io_op(ring, eoi_flags); if (more_to_do) break; @@ -1222,8 +1297,8 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct bio *bio = NULL; struct bio **biolist = pending_req->biolist; int i, nbio = 0; - int operation; - int operation_flags = 0; + enum req_op operation; + blk_opf_t operation_flags = 0; struct blk_plug plug; bool drain = false; struct grant_page **pages = pending_req->segments; @@ -1251,6 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, break; case BLKIF_OP_WRITE_BARRIER: drain = true; + fallthrough; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; @@ -1355,18 +1431,13 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, pages[i]->page, seg[i].nsec << 9, seg[i].offset) == 0)) { - - int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES); - bio = bio_alloc(GFP_KERNEL, nr_iovecs); - if (unlikely(bio == NULL)) - goto fail_put_bio; - + bio = bio_alloc(preq.bdev, bio_max_segs(nseg - i), + operation | operation_flags, + GFP_KERNEL); biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; bio->bi_iter.bi_sector = preq.sector_number; - bio_set_op_attrs(bio, operation, operation_flags); } preq.sector_number += seg[i].nsec; @@ -1376,15 +1447,11 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, if (!bio) { BUG_ON(operation_flags != REQ_PREFLUSH); - bio = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; - + bio = bio_alloc(preq.bdev, 0, operation | operation_flags, + GFP_KERNEL); biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; - bio_set_op_attrs(bio, operation, operation_flags); } atomic_set(&pending_req->pendcnt, nbio); @@ -1412,14 +1479,6 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, free_req(ring, pending_req); msleep(1); /* back off a bit */ return -EIO; - - fail_put_bio: - for (i = 0; i < nbio; i++) - bio_put(biolist[i]); - atomic_set(&pending_req->pendcnt, 1); - __end_block_io_op(pending_req, BLK_STS_RESOURCE); - msleep(1); /* back off a bit */ - return -EIO; } @@ -1496,5 +1555,14 @@ static int __init xen_blkif_init(void) module_init(xen_blkif_init); +static void __exit xen_blkif_fini(void) +{ + xen_blkif_xenbus_fini(); + xen_blkif_interface_fini(); +} + +module_exit(xen_blkif_fini); + +MODULE_DESCRIPTION("Virtual block device back-end driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("xen-backend:vbd"); |
