diff options
Diffstat (limited to 'drivers/block/xen-blkfront.c')
| -rw-r--r-- | drivers/block/xen-blkfront.c | 360 |
1 files changed, 188 insertions, 172 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ccd0dd0c6b83..04fc6b552c04 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -209,7 +213,11 @@ struct blkfront_info unsigned int feature_fua:1; unsigned int feature_discard:1; unsigned int feature_secdiscard:1; + /* Connect-time cached feature_persistent parameter */ + unsigned int feature_persistent_parm:1; + /* Persistent grants feature negotiation result */ unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -229,8 +237,6 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define GRANT_INVALID_REF 0 - #define PARTS_PER_DISK 16 #define PARTS_PER_EXT_DISK 256 @@ -312,8 +318,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -321,7 +327,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) gnt_list_entry->page = granted_page; } - gnt_list_entry->gref = GRANT_INVALID_REF; + gnt_list_entry->gref = INVALID_GRANT_REF; list_add(&gnt_list_entry->node, &rinfo->grants); i++; } @@ -332,7 +338,7 @@ out_of_memory: list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -350,7 +356,7 @@ static struct grant *get_free_grant(struct blkfront_ring_info *rinfo) node); list_del(&gnt_list_entry->node); - if (gnt_list_entry->gref != GRANT_INVALID_REF) + if (gnt_list_entry->gref != INVALID_GRANT_REF) rinfo->persistent_gnts_c--; return gnt_list_entry; @@ -372,13 +378,13 @@ static struct grant *get_grant(grant_ref_t *gref_head, struct grant *gnt_list_entry = get_free_grant(rinfo); struct blkfront_info *info = rinfo->dev_info; - if (gnt_list_entry->gref != GRANT_INVALID_REF) + if (gnt_list_entry->gref != INVALID_GRANT_REF) return gnt_list_entry; /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -396,13 +402,13 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, struct grant *gnt_list_entry = get_free_grant(rinfo); struct blkfront_info *info = rinfo->dev_info; - if (gnt_list_entry->gref != GRANT_INVALID_REF) + if (gnt_list_entry->gref != INVALID_GRANT_REF) return gnt_list_entry; /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -487,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg) schedule_work(&rinfo->work); } -static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg) { /* We don't have real geometry info, but let's at least return values consistent with the size of the device */ - sector_t nsect = get_capacity(bd->bd_disk); + sector_t nsect = get_capacity(disk); sector_t cylinders = nsect; hg->heads = 0xff; @@ -503,7 +509,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) return 0; } -static int blkif_ioctl(struct block_device *bdev, fmode_t mode, +static int blkif_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned command, unsigned long argument) { struct blkfront_info *info = bdev->bd_disk->private_data; @@ -576,7 +582,7 @@ struct setup_rw_req { struct blkif_request *ring_req; grant_ref_t gref_head; unsigned int id; - /* Only used when persistent grant is used and it's a read request */ + /* Only used when persistent grant is used and it's a write request */ bool need_copy; unsigned int bvec_off; char *bvec_data; @@ -705,7 +711,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -745,7 +751,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri id = blkif_ring_get_request(rinfo, req, &final_ring_req); ring_req = &rinfo->shadow[id].req; - num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); + num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg); num_grant = 0; /* Calculate the number of grant used */ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) @@ -774,13 +780,19 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.handle = info->handle; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) { + if (req_op(req) == REQ_OP_FLUSH || + (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) { /* * Ideally we can do an unordered flush-to-disk. * In case the backend onlysupports barriers, use that. * A barrier request a superset of FUA, so we can * implement it the same way. (It's also a FLUSH+FUA, * since it is guaranteed ordered WRT previous writes.) + * + * Note that can end up here with a FUA write and the + * flags cleared. This happens when the flag was + * run-time disabled after a failing I/O, and we'll + * simplify submit it as a normal write. */ if (info->feature_flush && info->feature_fua) ring_req->operation = @@ -788,8 +800,6 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri else if (info->feature_flush) ring_req->operation = BLKIF_OP_FLUSH_DISKCACHE; - else - ring_req->operation = 0; } ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { @@ -880,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo) notify_remote_via_irq(rinfo->irq); } -static inline bool blkif_request_flush_invalid(struct request *req, - struct blkfront_info *info) -{ - return (blk_rq_is_passthrough(req) || - ((req_op(req) == REQ_OP_FLUSH) && - !info->feature_flush) || - ((req->cmd_flags & REQ_FUA) && - !info->feature_fua)); -} - static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *qd) { @@ -901,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, rinfo = get_rinfo(info, qid); blk_mq_start_request(qd->rq); spin_lock_irqsave(&rinfo->ring_lock, flags); - if (RING_FULL(&rinfo->ring)) - goto out_busy; - if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info)) - goto out_err; + /* + * Check if the backend actually supports flushes. + * + * While the block layer won't send us flushes if we don't claim to + * support them, the Xen protocol allows the backend to revoke support + * at any time. That is of course a really bad idea and dangerous, but + * has been allowed for 10+ years. In that case we simply clear the + * flags, and directly return here for an empty flush and ignore the + * FUA flag later on. + */ + if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush)) + goto complete; + if (RING_FULL(&rinfo->ring)) + goto out_busy; if (blkif_queue_request(qd->rq, rinfo)) goto out_busy; @@ -914,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, spin_unlock_irqrestore(&rinfo->ring_lock, flags); return BLK_STS_OK; -out_err: - spin_unlock_irqrestore(&rinfo->ring_lock, flags); - return BLK_STS_IOERR; - out_busy: blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&rinfo->ring_lock, flags); return BLK_STS_DEV_RESOURCE; +complete: + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + blk_mq_end_request(qd->rq, BLK_STS_OK); + return BLK_STS_OK; } static void blkif_complete_rq(struct request *rq) @@ -934,39 +944,41 @@ static const struct blk_mq_ops blkfront_mq_ops = { .complete = blkif_complete_rq, }; -static void blkif_set_queue_limits(struct blkfront_info *info) +static void blkif_set_queue_limits(const struct blkfront_info *info, + struct queue_limits *lim) { - struct request_queue *rq = info->rq; - struct gendisk *gd = info->gd; unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); - if (info->feature_discard) { - blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); - blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity ?: - info->physical_sector_size; - rq->limits.discard_alignment = info->discard_alignment; + lim->max_hw_discard_sectors = UINT_MAX; + if (info->discard_granularity) + lim->discard_granularity = info->discard_granularity; + lim->discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); + lim->max_secure_erase_sectors = UINT_MAX; + } + + if (info->feature_flush) { + lim->features |= BLK_FEAT_WRITE_CACHE; + if (info->feature_fua) + lim->features |= BLK_FEAT_FUA; } /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, info->sector_size); - blk_queue_physical_block_size(rq, info->physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + lim->logical_block_size = info->sector_size; + lim->physical_block_size = info->physical_sector_size; + lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512; /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); + lim->seg_boundary_mask = PAGE_SIZE - 1; + lim->max_segment_size = PAGE_SIZE; /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + lim->max_segments = segments / GRANTS_PER_PSEG; /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); + lim->dma_alignment = 511; } static const char *flush_info(struct blkfront_info *info) @@ -981,13 +993,12 @@ static const char *flush_info(struct blkfront_info *info) static void xlvbd_flush(struct blkfront_info *info) { - blk_queue_write_cache(info->rq, info->feature_flush ? true : false, - info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1059,9 +1070,9 @@ static char *encode_disk_name(char *ptr, unsigned int n) } static int xlvbd_alloc_gendisk(blkif_sector_t capacity, - struct blkfront_info *info, u16 sector_size, - unsigned int physical_sector_size) + struct blkfront_info *info) { + struct queue_limits lim = {}; struct gendisk *gd; int nr_minors = 1; int err; @@ -1120,7 +1131,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, } else info->tag_set.queue_depth = BLK_RING_SIZE(info); info->tag_set.numa_node = NUMA_NO_NODE; - info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; info->tag_set.cmd_size = sizeof(struct blkif_req); info->tag_set.driver_data = info; @@ -1128,7 +1138,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (err) goto out_release_minors; - gd = blk_mq_alloc_disk(&info->tag_set, info); + blkif_set_queue_limits(info, &lim); + gd = blk_mq_alloc_disk(&info->tag_set, &lim, info); if (IS_ERR(gd)) { err = PTR_ERR(gd); goto out_free_tag_set; @@ -1152,9 +1163,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->rq = gd->queue; info->gd = gd; - info->sector_size = sector_size; - info->physical_sector_size = physical_sector_size; - blkif_set_queue_limits(info); xlvbd_flush(info); @@ -1209,7 +1217,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1221,12 +1229,12 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) list_for_each_entry_safe(persistent_gnt, n, &rinfo->grants, node) { list_del(&persistent_gnt->node); - if (persistent_gnt->gref != GRANT_INVALID_REF) { + if (persistent_gnt->gref != INVALID_GRANT_REF) { gnttab_end_foreign_access(persistent_gnt->gref, - 0, 0UL); + NULL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1246,8 +1254,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) rinfo->shadow[i].req.u.rw.nr_segments; for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + gnttab_end_foreign_access(persistent_gnt->gref, NULL); + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1261,7 +1269,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < INDIRECT_GREFS(segs); j++) { persistent_gnt = rinfo->shadow[i].indirect_grants[j]; - gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); + gnttab_end_foreign_access(persistent_gnt->gref, NULL); __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1282,14 +1290,8 @@ free_shadow: flush_work(&rinfo->work); /* Free resources associated with old device channel. */ - for (i = 0; i < info->nr_ring_pages; i++) { - if (rinfo->ring_ref[i] != GRANT_INVALID_REF) { - gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0); - rinfo->ring_ref[i] = GRANT_INVALID_REF; - } - } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); - rinfo->ring.sring = NULL; + xenbus_teardown_ring((void **)&rinfo->ring.sring, info->nr_ring_pages, + rinfo->ring_ref); if (rinfo->irq) unbind_from_irqhandler(rinfo->irq, rinfo); @@ -1372,9 +1374,15 @@ static int blkif_get_final_status(enum blk_req_status s1, return BLKIF_RSP_OKAY; } -static bool blkif_completion(unsigned long *id, - struct blkfront_ring_info *rinfo, - struct blkif_response *bret) +/* + * Return values: + * 1 response processed. + * 0 missing further responses. + * -1 error while processing. + */ +static int blkif_completion(unsigned long *id, + struct blkfront_ring_info *rinfo, + struct blkif_response *bret) { int i = 0; struct scatterlist *sg; @@ -1397,7 +1405,7 @@ static bool blkif_completion(unsigned long *id, /* Wait the second response if not yet here. */ if (s2->status < REQ_DONE) - return false; + return 0; bret->status = blkif_get_final_status(s->status, s2->status); @@ -1430,7 +1438,7 @@ static bool blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1448,57 +1456,58 @@ static bool blkif_completion(unsigned long *id, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < num_grant; i++) { - if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { + if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) { /* * If the grant is still mapped by the backend (the * backend has chosen to make this grant persistent) * we add it at the head of the list, so it will be * reused first. */ - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->grants_used[i]->gref); + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->grants_used[i]->gref); + return -1; + } list_add(&s->grants_used[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { /* - * If the grant is not mapped by the backend we end the - * foreign access and add it to the tail of the list, - * so it will not be picked again unless we run out of - * persistent grants. + * If the grant is not mapped by the backend we add it + * to the tail of the list, so it will not be picked + * again unless we run out of persistent grants. */ - gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); - s->grants_used[i]->gref = GRANT_INVALID_REF; + s->grants_used[i]->gref = INVALID_GRANT_REF; list_add_tail(&s->grants_used[i]->node, &rinfo->grants); } } if (s->req.operation == BLKIF_OP_INDIRECT) { for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { - if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->indirect_grants[i]->gref); + if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) { + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->indirect_grants[i]->gref); + return -1; + } list_add(&s->indirect_grants[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { struct page *indirect_page; - gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL); /* * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } - s->indirect_grants[i]->gref = GRANT_INVALID_REF; + s->indirect_grants[i]->gref = INVALID_GRANT_REF; list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants); } } } - return true; + return 1; } static irqreturn_t blkif_interrupt(int irq, void *dev_id) @@ -1564,12 +1573,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) } if (bret.operation != BLKIF_OP_DISCARD) { + int ret; + /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, &bret)) + ret = blkif_completion(&id, rinfo, &bret); + if (!ret) continue; + if (unlikely(ret < 0)) + goto err; } if (add_id_to_freelist(rinfo, id)) { @@ -1593,8 +1607,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); + blk_queue_disable_discard(rq); + blk_queue_disable_secure_erase(rq); } break; case BLKIF_OP_FLUSH_DISKCACHE: @@ -1615,7 +1629,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_OK; info->feature_fua = 0; info->feature_flush = 0; - xlvbd_flush(info); } fallthrough; case BLKIF_OP_READ: @@ -1668,31 +1681,16 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_ring_info *rinfo) { struct blkif_sring *sring; - int err, i; + int err; struct blkfront_info *info = rinfo->dev_info; unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE; - grant_ref_t gref[XENBUS_MAX_RING_GRANTS]; - - for (i = 0; i < info->nr_ring_pages; i++) - rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, - get_order(ring_size)); - if (!sring) { - xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); - return -ENOMEM; - } - SHARED_RING_INIT(sring); - FRONT_RING_INIT(&rinfo->ring, sring, ring_size); - - err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); - if (err < 0) { - free_pages((unsigned long)sring, get_order(ring_size)); - rinfo->ring.sring = NULL; + err = xenbus_setup_ring(dev, GFP_NOIO, (void **)&sring, + info->nr_ring_pages, rinfo->ring_ref); + if (err) goto fail; - } - for (i = 0; i < info->nr_ring_pages; i++) - rinfo->ring_ref[i] = gref[i]; + + XEN_FRONT_RING_INIT(&rinfo->ring, sring, ring_size); err = xenbus_alloc_evtchn(dev, &rinfo->evtchn); if (err) @@ -1761,6 +1759,12 @@ abort_transaction: return err; } +/* Enable the persistent grants feature. */ +static bool feature_persistent = true; +module_param(feature_persistent, bool, 0644); +MODULE_PARM_DESC(feature_persistent, + "Enables the persistent grants feature"); + /* Common code used when first setting up, and when resuming. */ static int talk_to_blkback(struct xenbus_device *dev, struct blkfront_info *info) @@ -1775,6 +1779,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -1848,8 +1856,9 @@ again: message = "writing protocol"; goto abort_transaction; } + info->feature_persistent_parm = feature_persistent; err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", - info->feature_persistent); + info->feature_persistent_parm); if (err) dev_warn(&dev->dev, "writing persistent grants feature to xenbus"); @@ -1917,12 +1926,6 @@ static int negotiate_mq(struct blkfront_info *info) return 0; } -/* Enable the persistent grants feature. */ -static bool feature_persistent = true; -module_param(feature_persistent, bool, 0644); -MODULE_PARM_DESC(feature_persistent, - "Enables the persistent grants feature"); - /* * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffer for communication with the backend, and @@ -1989,8 +1992,6 @@ static int blkfront_probe(struct xenbus_device *dev, info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; - info->feature_persistent = feature_persistent; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); @@ -2004,18 +2005,19 @@ static int blkfront_probe(struct xenbus_device *dev, static int blkif_recover(struct blkfront_info *info) { + struct queue_limits lim; unsigned int r_index; struct request *req, *n; int rc; struct bio *bio; - unsigned int segs; struct blkfront_ring_info *rinfo; + lim = queue_limits_start_update(info->rq); blkfront_gather_backend_features(info); - /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ - blkif_set_queue_limits(info); - segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); + blkif_set_queue_limits(info, &lim); + rc = queue_limits_commit_update(info->rq, &lim); + if (rc) + return rc; for_each_rinfo(info, rinfo, r_index) { rc = blkfront_setup_indirect(rinfo); @@ -2035,7 +2037,9 @@ static int blkif_recover(struct blkfront_info *info) list_for_each_entry_safe(req, n, &info->requests, queuelist) { /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); - BUG_ON(req->nr_phys_segments > segs); + BUG_ON(req->nr_phys_segments > + (info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST)); blk_mq_requeue_request(req, false); } blk_mq_start_stopped_hw_queues(info->rq, true); @@ -2125,9 +2129,10 @@ static void blkfront_closing(struct blkfront_info *info) return; /* No more blkif_request(). */ - blk_mq_stop_hw_queues(info->rq); - blk_set_queue_dying(info->rq); - set_capacity(info->gd, 0); + if (info->rq && info->gd) { + blk_mq_stop_hw_queues(info->rq); + blk_mark_disk_dead(info->gd); + } for_each_rinfo(info, rinfo, i) { /* No more gnttab callback work. */ @@ -2182,17 +2187,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2281,10 +2287,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0)) blkfront_setup_discard(info); - if (info->feature_persistent) + if (info->feature_persistent_parm) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2308,8 +2316,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) static void blkfront_connect(struct blkfront_info *info) { unsigned long long sectors; - unsigned long sector_size; - unsigned int physical_sector_size; int err, i; struct blkfront_ring_info *rinfo; @@ -2348,7 +2354,7 @@ static void blkfront_connect(struct blkfront_info *info) err = xenbus_gather(XBT_NIL, info->xbdev->otherend, "sectors", "%llu", §ors, "info", "%u", &info->vdisk_info, - "sector-size", "%lu", §or_size, + "sector-size", "%lu", &info->sector_size, NULL); if (err) { xenbus_dev_fatal(info->xbdev, err, @@ -2362,9 +2368,9 @@ static void blkfront_connect(struct blkfront_info *info) * provide this. Assume physical sector size to be the same as * sector_size in that case. */ - physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend, + info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend, "physical-sector-size", - sector_size); + info->sector_size); blkfront_gather_backend_features(info); for_each_rinfo(info, rinfo, i) { err = blkfront_setup_indirect(rinfo); @@ -2376,8 +2382,7 @@ static void blkfront_connect(struct blkfront_info *info) } } - err = xlvbd_alloc_gendisk(sectors, info, sector_size, - physical_sector_size); + err = xlvbd_alloc_gendisk(sectors, info); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); @@ -2393,7 +2398,7 @@ static void blkfront_connect(struct blkfront_info *info) err = device_add_disk(&info->xbdev->dev, info->gd, NULL); if (err) { - blk_cleanup_disk(info->gd); + put_disk(info->gd); blk_mq_free_tag_set(&info->tag_set); info->rq = NULL; goto fail; @@ -2462,25 +2467,27 @@ static void blkback_changed(struct xenbus_device *dev, } } -static int blkfront_remove(struct xenbus_device *xbdev) +static void blkfront_remove(struct xenbus_device *xbdev) { struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); - del_gendisk(info->gd); + if (info->gd) + del_gendisk(info->gd); mutex_lock(&blkfront_mutex); list_del(&info->info_list); mutex_unlock(&blkfront_mutex); blkif_free(info, 0); - xlbd_release_minors(info->gd->first_minor, info->gd->minors); - blk_cleanup_disk(info->gd); - blk_mq_free_tag_set(&info->tag_set); + if (info->gd) { + xlbd_release_minors(info->gd->first_minor, info->gd->minors); + put_disk(info->gd); + blk_mq_free_tag_set(&info->tag_set); + } kfree(info); - return 0; } static int blkfront_is_ready(struct xenbus_device *dev) @@ -2521,6 +2528,7 @@ static void purge_persistent_grants(struct blkfront_info *info) for_each_rinfo(info, rinfo, i) { struct grant *gnt_list_entry, *tmp; + LIST_HEAD(grants); spin_lock_irqsave(&rinfo->ring_lock, flags); @@ -2531,17 +2539,18 @@ static void purge_persistent_grants(struct blkfront_info *info) list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, node) { - if (gnt_list_entry->gref == GRANT_INVALID_REF || - gnttab_query_foreign_access(gnt_list_entry->gref)) + if (gnt_list_entry->gref == INVALID_GRANT_REF || + !gnttab_try_end_foreign_access(gnt_list_entry->gref)) continue; list_del(&gnt_list_entry->node); - gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); rinfo->persistent_gnts_c--; - gnt_list_entry->gref = GRANT_INVALID_REF; - list_add_tail(&gnt_list_entry->node, &rinfo->grants); + gnt_list_entry->gref = INVALID_GRANT_REF; + list_add_tail(&gnt_list_entry->node, &grants); } + list_splice_tail(&grants, &rinfo->grants); + spin_unlock_irqrestore(&rinfo->ring_lock, flags); } } @@ -2551,6 +2560,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { |
