summaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c360
1 files changed, 188 insertions, 172 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ccd0dd0c6b83..04fc6b552c04 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -152,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+static bool __read_mostly xen_blkif_trusted = true;
+module_param_named(trusted, xen_blkif_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
@@ -209,7 +213,11 @@ struct blkfront_info
unsigned int feature_fua:1;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
+ /* Connect-time cached feature_persistent parameter */
+ unsigned int feature_persistent_parm:1;
+ /* Persistent grants feature negotiation result */
unsigned int feature_persistent:1;
+ unsigned int bounce:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
/* Number of 4KB segments handled */
@@ -229,8 +237,6 @@ static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define GRANT_INVALID_REF 0
-
#define PARTS_PER_DISK 16
#define PARTS_PER_EXT_DISK 256
@@ -312,8 +318,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
if (!gnt_list_entry)
goto out_of_memory;
- if (info->feature_persistent) {
- granted_page = alloc_page(GFP_NOIO);
+ if (info->bounce) {
+ granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
if (!granted_page) {
kfree(gnt_list_entry);
goto out_of_memory;
@@ -321,7 +327,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
gnt_list_entry->page = granted_page;
}
- gnt_list_entry->gref = GRANT_INVALID_REF;
+ gnt_list_entry->gref = INVALID_GRANT_REF;
list_add(&gnt_list_entry->node, &rinfo->grants);
i++;
}
@@ -332,7 +338,7 @@ out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n,
&rinfo->grants, node) {
list_del(&gnt_list_entry->node);
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
i--;
@@ -350,7 +356,7 @@ static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
node);
list_del(&gnt_list_entry->node);
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
rinfo->persistent_gnts_c--;
return gnt_list_entry;
@@ -372,13 +378,13 @@ static struct grant *get_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (info->feature_persistent)
+ if (info->bounce)
grant_foreign_access(gnt_list_entry, info);
else {
/* Grant access to the GFN passed by the caller */
@@ -396,13 +402,13 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (!info->feature_persistent) {
+ if (!info->bounce) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
@@ -487,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg)
schedule_work(&rinfo->work);
}
-static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg)
{
/* We don't have real geometry info, but let's at least return
values consistent with the size of the device */
- sector_t nsect = get_capacity(bd->bd_disk);
+ sector_t nsect = get_capacity(disk);
sector_t cylinders = nsect;
hg->heads = 0xff;
@@ -503,7 +509,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
return 0;
}
-static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkif_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned command, unsigned long argument)
{
struct blkfront_info *info = bdev->bd_disk->private_data;
@@ -576,7 +582,7 @@ struct setup_rw_req {
struct blkif_request *ring_req;
grant_ref_t gref_head;
unsigned int id;
- /* Only used when persistent grant is used and it's a read request */
+ /* Only used when persistent grant is used and it's a write request */
bool need_copy;
unsigned int bvec_off;
char *bvec_data;
@@ -705,7 +711,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
.grant_idx = 0,
.segments = NULL,
.rinfo = rinfo,
- .need_copy = rq_data_dir(req) && info->feature_persistent,
+ .need_copy = rq_data_dir(req) && info->bounce,
};
/*
@@ -745,7 +751,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
id = blkif_ring_get_request(rinfo, req, &final_ring_req);
ring_req = &rinfo->shadow[id].req;
- num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
+ num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg);
num_grant = 0;
/* Calculate the number of grant used */
for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
@@ -774,13 +780,19 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
+ if (req_op(req) == REQ_OP_FLUSH ||
+ (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
/*
* Ideally we can do an unordered flush-to-disk.
* In case the backend onlysupports barriers, use that.
* A barrier request a superset of FUA, so we can
* implement it the same way. (It's also a FLUSH+FUA,
* since it is guaranteed ordered WRT previous writes.)
+ *
+ * Note that can end up here with a FUA write and the
+ * flags cleared. This happens when the flag was
+ * run-time disabled after a failing I/O, and we'll
+ * simplify submit it as a normal write.
*/
if (info->feature_flush && info->feature_fua)
ring_req->operation =
@@ -788,8 +800,6 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
else if (info->feature_flush)
ring_req->operation =
BLKIF_OP_FLUSH_DISKCACHE;
- else
- ring_req->operation = 0;
}
ring_req->u.rw.nr_segments = num_grant;
if (unlikely(require_extra_req)) {
@@ -880,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
notify_remote_via_irq(rinfo->irq);
}
-static inline bool blkif_request_flush_invalid(struct request *req,
- struct blkfront_info *info)
-{
- return (blk_rq_is_passthrough(req) ||
- ((req_op(req) == REQ_OP_FLUSH) &&
- !info->feature_flush) ||
- ((req->cmd_flags & REQ_FUA) &&
- !info->feature_fua));
-}
-
static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
@@ -901,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
rinfo = get_rinfo(info, qid);
blk_mq_start_request(qd->rq);
spin_lock_irqsave(&rinfo->ring_lock, flags);
- if (RING_FULL(&rinfo->ring))
- goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
- goto out_err;
+ /*
+ * Check if the backend actually supports flushes.
+ *
+ * While the block layer won't send us flushes if we don't claim to
+ * support them, the Xen protocol allows the backend to revoke support
+ * at any time. That is of course a really bad idea and dangerous, but
+ * has been allowed for 10+ years. In that case we simply clear the
+ * flags, and directly return here for an empty flush and ignore the
+ * FUA flag later on.
+ */
+ if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush))
+ goto complete;
+ if (RING_FULL(&rinfo->ring))
+ goto out_busy;
if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
@@ -914,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_OK;
-out_err:
- spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_STS_IOERR;
-
out_busy:
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_DEV_RESOURCE;
+complete:
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ blk_mq_end_request(qd->rq, BLK_STS_OK);
+ return BLK_STS_OK;
}
static void blkif_complete_rq(struct request *rq)
@@ -934,39 +944,41 @@ static const struct blk_mq_ops blkfront_mq_ops = {
.complete = blkif_complete_rq,
};
-static void blkif_set_queue_limits(struct blkfront_info *info)
+static void blkif_set_queue_limits(const struct blkfront_info *info,
+ struct queue_limits *lim)
{
- struct request_queue *rq = info->rq;
- struct gendisk *gd = info->gd;
unsigned int segments = info->max_indirect_segments ? :
BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
-
if (info->feature_discard) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
- blk_queue_max_discard_sectors(rq, get_capacity(gd));
- rq->limits.discard_granularity = info->discard_granularity ?:
- info->physical_sector_size;
- rq->limits.discard_alignment = info->discard_alignment;
+ lim->max_hw_discard_sectors = UINT_MAX;
+ if (info->discard_granularity)
+ lim->discard_granularity = info->discard_granularity;
+ lim->discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+ lim->max_secure_erase_sectors = UINT_MAX;
+ }
+
+ if (info->feature_flush) {
+ lim->features |= BLK_FEAT_WRITE_CACHE;
+ if (info->feature_fua)
+ lim->features |= BLK_FEAT_FUA;
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_logical_block_size(rq, info->sector_size);
- blk_queue_physical_block_size(rq, info->physical_sector_size);
- blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+ lim->logical_block_size = info->sector_size;
+ lim->physical_block_size = info->physical_sector_size;
+ lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512;
/* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
+ lim->seg_boundary_mask = PAGE_SIZE - 1;
+ lim->max_segment_size = PAGE_SIZE;
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+ lim->max_segments = segments / GRANTS_PER_PSEG;
/* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
+ lim->dma_alignment = 511;
}
static const char *flush_info(struct blkfront_info *info)
@@ -981,13 +993,12 @@ static const char *flush_info(struct blkfront_info *info)
static void xlvbd_flush(struct blkfront_info *info)
{
- blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
- info->feature_fua ? true : false);
- pr_info("blkfront: %s: %s %s %s %s %s\n",
+ pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
"enabled;" : "disabled;", "indirect descriptors:",
- info->max_indirect_segments ? "enabled;" : "disabled;");
+ info->max_indirect_segments ? "enabled;" : "disabled;",
+ "bounce buffer:", info->bounce ? "enabled" : "disabled;");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -1059,9 +1070,9 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
+ struct queue_limits lim = {};
struct gendisk *gd;
int nr_minors = 1;
int err;
@@ -1120,7 +1131,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
} else
info->tag_set.queue_depth = BLK_RING_SIZE(info);
info->tag_set.numa_node = NUMA_NO_NODE;
- info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
info->tag_set.cmd_size = sizeof(struct blkif_req);
info->tag_set.driver_data = info;
@@ -1128,7 +1138,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (err)
goto out_release_minors;
- gd = blk_mq_alloc_disk(&info->tag_set, info);
+ blkif_set_queue_limits(info, &lim);
+ gd = blk_mq_alloc_disk(&info->tag_set, &lim, info);
if (IS_ERR(gd)) {
err = PTR_ERR(gd);
goto out_free_tag_set;
@@ -1152,9 +1163,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
- blkif_set_queue_limits(info);
xlvbd_flush(info);
@@ -1209,7 +1217,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- BUG_ON(info->feature_persistent);
+ BUG_ON(info->bounce);
list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
@@ -1221,12 +1229,12 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
list_for_each_entry_safe(persistent_gnt, n,
&rinfo->grants, node) {
list_del(&persistent_gnt->node);
- if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ if (persistent_gnt->gref != INVALID_GRANT_REF) {
gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
+ NULL);
rinfo->persistent_gnts_c--;
}
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1246,8 +1254,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
rinfo->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
persistent_gnt = rinfo->shadow[i].grants_used[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
- if (info->feature_persistent)
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1261,7 +1269,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
persistent_gnt = rinfo->shadow[i].indirect_grants[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1282,14 +1290,8 @@ free_shadow:
flush_work(&rinfo->work);
/* Free resources associated with old device channel. */
- for (i = 0; i < info->nr_ring_pages; i++) {
- if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
- }
- }
- free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
- rinfo->ring.sring = NULL;
+ xenbus_teardown_ring((void **)&rinfo->ring.sring, info->nr_ring_pages,
+ rinfo->ring_ref);
if (rinfo->irq)
unbind_from_irqhandler(rinfo->irq, rinfo);
@@ -1372,9 +1374,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
return BLKIF_RSP_OKAY;
}
-static bool blkif_completion(unsigned long *id,
- struct blkfront_ring_info *rinfo,
- struct blkif_response *bret)
+/*
+ * Return values:
+ * 1 response processed.
+ * 0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
+ struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
@@ -1397,7 +1405,7 @@ static bool blkif_completion(unsigned long *id,
/* Wait the second response if not yet here. */
if (s2->status < REQ_DONE)
- return false;
+ return 0;
bret->status = blkif_get_final_status(s->status,
s2->status);
@@ -1430,7 +1438,7 @@ static bool blkif_completion(unsigned long *id,
data.s = s;
num_sg = s->num_sg;
- if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
+ if (bret->operation == BLKIF_OP_READ && info->bounce) {
for_each_sg(s->sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
@@ -1448,57 +1456,58 @@ static bool blkif_completion(unsigned long *id,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < num_grant; i++) {
- if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+ if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
/*
* If the grant is still mapped by the backend (the
* backend has chosen to make this grant persistent)
* we add it at the head of the list, so it will be
* reused first.
*/
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->grants_used[i]->gref);
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->grants_used[i]->gref);
+ return -1;
+ }
list_add(&s->grants_used[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
/*
- * If the grant is not mapped by the backend we end the
- * foreign access and add it to the tail of the list,
- * so it will not be picked again unless we run out of
- * persistent grants.
+ * If the grant is not mapped by the backend we add it
+ * to the tail of the list, so it will not be picked
+ * again unless we run out of persistent grants.
*/
- gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
- s->grants_used[i]->gref = GRANT_INVALID_REF;
+ s->grants_used[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
- if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->indirect_grants[i]->gref);
+ if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->indirect_grants[i]->gref);
+ return -1;
+ }
list_add(&s->indirect_grants[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
- gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
/*
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
- if (!info->feature_persistent) {
+ if (!info->bounce) {
indirect_page = s->indirect_grants[i]->page;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
- s->indirect_grants[i]->gref = GRANT_INVALID_REF;
+ s->indirect_grants[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
}
}
}
- return true;
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1564,12 +1573,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
}
if (bret.operation != BLKIF_OP_DISCARD) {
+ int ret;
+
/*
* We may need to wait for an extra response if the
* I/O request is split in 2
*/
- if (!blkif_completion(&id, rinfo, &bret))
+ ret = blkif_completion(&id, rinfo, &bret);
+ if (!ret)
continue;
+ if (unlikely(ret < 0))
+ goto err;
}
if (add_id_to_freelist(rinfo, id)) {
@@ -1593,8 +1607,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
- blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_disable_discard(rq);
+ blk_queue_disable_secure_erase(rq);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
@@ -1615,7 +1629,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
- xlvbd_flush(info);
}
fallthrough;
case BLKIF_OP_READ:
@@ -1668,31 +1681,16 @@ static int setup_blkring(struct xenbus_device *dev,
struct blkfront_ring_info *rinfo)
{
struct blkif_sring *sring;
- int err, i;
+ int err;
struct blkfront_info *info = rinfo->dev_info;
unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
- grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
-
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
- get_order(ring_size));
- if (!sring) {
- xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
- return -ENOMEM;
- }
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
-
- err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
- if (err < 0) {
- free_pages((unsigned long)sring, get_order(ring_size));
- rinfo->ring.sring = NULL;
+ err = xenbus_setup_ring(dev, GFP_NOIO, (void **)&sring,
+ info->nr_ring_pages, rinfo->ring_ref);
+ if (err)
goto fail;
- }
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = gref[i];
+
+ XEN_FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
if (err)
@@ -1761,6 +1759,12 @@ abort_transaction:
return err;
}
+/* Enable the persistent grants feature. */
+static bool feature_persistent = true;
+module_param(feature_persistent, bool, 0644);
+MODULE_PARM_DESC(feature_persistent,
+ "Enables the persistent grants feature");
+
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
struct blkfront_info *info)
@@ -1775,6 +1779,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
if (!info)
return -ENODEV;
+ /* Check if backend is trusted. */
+ info->bounce = !xen_blkif_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
"max-ring-page-order", 0);
ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
@@ -1848,8 +1856,9 @@ again:
message = "writing protocol";
goto abort_transaction;
}
+ info->feature_persistent_parm = feature_persistent;
err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
- info->feature_persistent);
+ info->feature_persistent_parm);
if (err)
dev_warn(&dev->dev,
"writing persistent grants feature to xenbus");
@@ -1917,12 +1926,6 @@ static int negotiate_mq(struct blkfront_info *info)
return 0;
}
-/* Enable the persistent grants feature. */
-static bool feature_persistent = true;
-module_param(feature_persistent, bool, 0644);
-MODULE_PARM_DESC(feature_persistent,
- "Enables the persistent grants feature");
-
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
@@ -1989,8 +1992,6 @@ static int blkfront_probe(struct xenbus_device *dev,
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
- info->feature_persistent = feature_persistent;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
@@ -2004,18 +2005,19 @@ static int blkfront_probe(struct xenbus_device *dev,
static int blkif_recover(struct blkfront_info *info)
{
+ struct queue_limits lim;
unsigned int r_index;
struct request *req, *n;
int rc;
struct bio *bio;
- unsigned int segs;
struct blkfront_ring_info *rinfo;
+ lim = queue_limits_start_update(info->rq);
blkfront_gather_backend_features(info);
- /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
- blkif_set_queue_limits(info);
- segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
+ blkif_set_queue_limits(info, &lim);
+ rc = queue_limits_commit_update(info->rq, &lim);
+ if (rc)
+ return rc;
for_each_rinfo(info, rinfo, r_index) {
rc = blkfront_setup_indirect(rinfo);
@@ -2035,7 +2037,9 @@ static int blkif_recover(struct blkfront_info *info)
list_for_each_entry_safe(req, n, &info->requests, queuelist) {
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
- BUG_ON(req->nr_phys_segments > segs);
+ BUG_ON(req->nr_phys_segments >
+ (info->max_indirect_segments ? :
+ BLKIF_MAX_SEGMENTS_PER_REQUEST));
blk_mq_requeue_request(req, false);
}
blk_mq_start_stopped_hw_queues(info->rq, true);
@@ -2125,9 +2129,10 @@ static void blkfront_closing(struct blkfront_info *info)
return;
/* No more blkif_request(). */
- blk_mq_stop_hw_queues(info->rq);
- blk_set_queue_dying(info->rq);
- set_capacity(info->gd, 0);
+ if (info->rq && info->gd) {
+ blk_mq_stop_hw_queues(info->rq);
+ blk_mark_disk_dead(info->gd);
+ }
for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
@@ -2182,17 +2187,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
if (err)
goto out_of_memory;
- if (!info->feature_persistent && info->max_indirect_segments) {
+ if (!info->bounce && info->max_indirect_segments) {
/*
- * We are using indirect descriptors but not persistent
- * grants, we need to allocate a set of pages that can be
+ * We are using indirect descriptors but don't have a bounce
+ * buffer, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
- struct page *indirect_page = alloc_page(GFP_KERNEL);
+ struct page *indirect_page = alloc_page(GFP_KERNEL |
+ __GFP_ZERO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
@@ -2281,10 +2287,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0))
blkfront_setup_discard(info);
- if (info->feature_persistent)
+ if (info->feature_persistent_parm)
info->feature_persistent =
!!xenbus_read_unsigned(info->xbdev->otherend,
"feature-persistent", 0);
+ if (info->feature_persistent)
+ info->bounce = true;
indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
"feature-max-indirect-segments", 0);
@@ -2308,8 +2316,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2348,7 +2354,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
"info", "%u", &info->vdisk_info,
- "sector-size", "%lu", &sector_size,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2362,9 +2368,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2376,8 +2382,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
@@ -2393,7 +2398,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = device_add_disk(&info->xbdev->dev, info->gd, NULL);
if (err) {
- blk_cleanup_disk(info->gd);
+ put_disk(info->gd);
blk_mq_free_tag_set(&info->tag_set);
info->rq = NULL;
goto fail;
@@ -2462,25 +2467,27 @@ static void blkback_changed(struct xenbus_device *dev,
}
}
-static int blkfront_remove(struct xenbus_device *xbdev)
+static void blkfront_remove(struct xenbus_device *xbdev)
{
struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
- del_gendisk(info->gd);
+ if (info->gd)
+ del_gendisk(info->gd);
mutex_lock(&blkfront_mutex);
list_del(&info->info_list);
mutex_unlock(&blkfront_mutex);
blkif_free(info, 0);
- xlbd_release_minors(info->gd->first_minor, info->gd->minors);
- blk_cleanup_disk(info->gd);
- blk_mq_free_tag_set(&info->tag_set);
+ if (info->gd) {
+ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+ put_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
+ }
kfree(info);
- return 0;
}
static int blkfront_is_ready(struct xenbus_device *dev)
@@ -2521,6 +2528,7 @@ static void purge_persistent_grants(struct blkfront_info *info)
for_each_rinfo(info, rinfo, i) {
struct grant *gnt_list_entry, *tmp;
+ LIST_HEAD(grants);
spin_lock_irqsave(&rinfo->ring_lock, flags);
@@ -2531,17 +2539,18 @@ static void purge_persistent_grants(struct blkfront_info *info)
list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
node) {
- if (gnt_list_entry->gref == GRANT_INVALID_REF ||
- gnttab_query_foreign_access(gnt_list_entry->gref))
+ if (gnt_list_entry->gref == INVALID_GRANT_REF ||
+ !gnttab_try_end_foreign_access(gnt_list_entry->gref))
continue;
list_del(&gnt_list_entry->node);
- gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
rinfo->persistent_gnts_c--;
- gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+ gnt_list_entry->gref = INVALID_GRANT_REF;
+ list_add_tail(&gnt_list_entry->node, &grants);
}
+ list_splice_tail(&grants, &rinfo->grants);
+
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
}
@@ -2551,6 +2560,13 @@ static void blkfront_delay_work(struct work_struct *work)
struct blkfront_info *info;
bool need_schedule_work = false;
+ /*
+ * Note that when using bounce buffers but not persistent grants
+ * there's no need to run blkfront_delay_work because grants are
+ * revoked in blkif_completion or else an error is reported and the
+ * connection is closed.
+ */
+
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {