summaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkfront.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkfront.c')
-rw-r--r--drivers/block/xen-blkfront.c727
1 files changed, 312 insertions, 415 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 8d49f8fa98bb..04fc6b552c04 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -42,6 +42,7 @@
#include <linux/cdrom.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
@@ -80,6 +81,7 @@ enum blkif_state {
BLKIF_STATE_DISCONNECTED,
BLKIF_STATE_CONNECTED,
BLKIF_STATE_SUSPENDED,
+ BLKIF_STATE_ERROR,
};
struct grant {
@@ -89,6 +91,7 @@ struct grant {
};
enum blk_req_status {
+ REQ_PROCESSING,
REQ_WAITING,
REQ_DONE,
REQ_ERROR,
@@ -149,6 +152,10 @@ static unsigned int xen_blkif_max_ring_order;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+static bool __read_mostly xen_blkif_trusted = true;
+module_param_named(trusted, xen_blkif_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
@@ -195,6 +202,7 @@ struct blkfront_info
struct gendisk *gd;
u16 sector_size;
unsigned int physical_sector_size;
+ unsigned long vdisk_info;
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
@@ -205,7 +213,11 @@ struct blkfront_info
unsigned int feature_fua:1;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
+ /* Connect-time cached feature_persistent parameter */
+ unsigned int feature_persistent_parm:1;
+ /* Persistent grants feature negotiation result */
unsigned int feature_persistent:1;
+ unsigned int bounce:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
/* Number of 4KB segments handled */
@@ -225,8 +237,6 @@ static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define GRANT_INVALID_REF 0
-
#define PARTS_PER_DISK 16
#define PARTS_PER_EXT_DISK 256
@@ -308,8 +318,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
if (!gnt_list_entry)
goto out_of_memory;
- if (info->feature_persistent) {
- granted_page = alloc_page(GFP_NOIO);
+ if (info->bounce) {
+ granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
if (!granted_page) {
kfree(gnt_list_entry);
goto out_of_memory;
@@ -317,7 +327,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
gnt_list_entry->page = granted_page;
}
- gnt_list_entry->gref = GRANT_INVALID_REF;
+ gnt_list_entry->gref = INVALID_GRANT_REF;
list_add(&gnt_list_entry->node, &rinfo->grants);
i++;
}
@@ -328,7 +338,7 @@ out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n,
&rinfo->grants, node) {
list_del(&gnt_list_entry->node);
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
i--;
@@ -346,7 +356,7 @@ static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
node);
list_del(&gnt_list_entry->node);
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
rinfo->persistent_gnts_c--;
return gnt_list_entry;
@@ -368,13 +378,13 @@ static struct grant *get_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (info->feature_persistent)
+ if (info->bounce)
grant_foreign_access(gnt_list_entry, info);
else {
/* Grant access to the GFN passed by the caller */
@@ -392,13 +402,13 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head,
struct grant *gnt_list_entry = get_free_grant(rinfo);
struct blkfront_info *info = rinfo->dev_info;
- if (gnt_list_entry->gref != GRANT_INVALID_REF)
+ if (gnt_list_entry->gref != INVALID_GRANT_REF)
return gnt_list_entry;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (!info->feature_persistent) {
+ if (!info->bounce) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
@@ -483,11 +493,11 @@ static void blkif_restart_queue_callback(void *arg)
schedule_work(&rinfo->work);
}
-static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+static int blkif_getgeo(struct gendisk *disk, struct hd_geometry *hg)
{
/* We don't have real geometry info, but let's at least return
values consistent with the size of the device */
- sector_t nsect = get_capacity(bd->bd_disk);
+ sector_t nsect = get_capacity(disk);
sector_t cylinders = nsect;
hg->heads = 0xff;
@@ -499,37 +509,25 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
return 0;
}
-static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkif_ioctl(struct block_device *bdev, blk_mode_t mode,
unsigned command, unsigned long argument)
{
struct blkfront_info *info = bdev->bd_disk->private_data;
int i;
- dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
- command, (long)argument);
-
switch (command) {
case CDROMMULTISESSION:
- dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
if (put_user(0, (char __user *)(argument + i)))
return -EFAULT;
return 0;
-
- case CDROM_GET_CAPABILITY: {
- struct gendisk *gd = info->gd;
- if (gd->flags & GENHD_FL_CD)
- return 0;
- return -EINVAL;
- }
-
+ case CDROM_GET_CAPABILITY:
+ if (!(info->vdisk_info & VDISK_CDROM))
+ return -EINVAL;
+ return 0;
default:
- /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
- command);*/
- return -EINVAL; /* same return as native Linux */
+ return -EINVAL;
}
-
- return 0;
}
static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
@@ -543,10 +541,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
id = get_id_from_freelist(rinfo);
rinfo->shadow[id].request = req;
- rinfo->shadow[id].status = REQ_WAITING;
+ rinfo->shadow[id].status = REQ_PROCESSING;
rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
- (*ring_req)->u.rw.id = id;
+ rinfo->shadow[id].req.u.rw.id = id;
return id;
}
@@ -554,11 +552,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
{
struct blkfront_info *info = rinfo->dev_info;
- struct blkif_request *ring_req;
+ struct blkif_request *ring_req, *final_ring_req;
unsigned long id;
/* Fill out a communications ring structure. */
- id = blkif_ring_get_request(rinfo, req, &ring_req);
+ id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+ ring_req = &rinfo->shadow[id].req;
ring_req->operation = BLKIF_OP_DISCARD;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
@@ -569,8 +568,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf
else
ring_req->u.discard.flag = 0;
- /* Keep a private copy so we can reissue requests when recovering. */
- rinfo->shadow[id].req = *ring_req;
+ /* Copy the request to the ring page. */
+ *final_ring_req = *ring_req;
+ rinfo->shadow[id].status = REQ_WAITING;
return 0;
}
@@ -582,7 +582,7 @@ struct setup_rw_req {
struct blkif_request *ring_req;
grant_ref_t gref_head;
unsigned int id;
- /* Only used when persistent grant is used and it's a read request */
+ /* Only used when persistent grant is used and it's a write request */
bool need_copy;
unsigned int bvec_off;
char *bvec_data;
@@ -703,6 +703,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
{
struct blkfront_info *info = rinfo->dev_info;
struct blkif_request *ring_req, *extra_ring_req = NULL;
+ struct blkif_request *final_ring_req, *final_extra_ring_req = NULL;
unsigned long id, extra_id = NO_ASSOCIATED_ID;
bool require_extra_req = false;
int i;
@@ -710,7 +711,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
.grant_idx = 0,
.segments = NULL,
.rinfo = rinfo,
- .need_copy = rq_data_dir(req) && info->feature_persistent,
+ .need_copy = rq_data_dir(req) && info->bounce,
};
/*
@@ -747,9 +748,10 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
}
/* Fill out a communications ring structure. */
- id = blkif_ring_get_request(rinfo, req, &ring_req);
+ id = blkif_ring_get_request(rinfo, req, &final_ring_req);
+ ring_req = &rinfo->shadow[id].req;
- num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
+ num_sg = blk_rq_map_sg(req, rinfo->shadow[id].sg);
num_grant = 0;
/* Calculate the number of grant used */
for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
@@ -778,13 +780,19 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
ring_req->u.rw.handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
+ if (req_op(req) == REQ_OP_FLUSH ||
+ (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
/*
* Ideally we can do an unordered flush-to-disk.
* In case the backend onlysupports barriers, use that.
* A barrier request a superset of FUA, so we can
* implement it the same way. (It's also a FLUSH+FUA,
* since it is guaranteed ordered WRT previous writes.)
+ *
+ * Note that can end up here with a FUA write and the
+ * flags cleared. This happens when the flag was
+ * run-time disabled after a failing I/O, and we'll
+ * simplify submit it as a normal write.
*/
if (info->feature_flush && info->feature_fua)
ring_req->operation =
@@ -792,13 +800,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
else if (info->feature_flush)
ring_req->operation =
BLKIF_OP_FLUSH_DISKCACHE;
- else
- ring_req->operation = 0;
}
ring_req->u.rw.nr_segments = num_grant;
if (unlikely(require_extra_req)) {
extra_id = blkif_ring_get_request(rinfo, req,
- &extra_ring_req);
+ &final_extra_ring_req);
+ extra_ring_req = &rinfo->shadow[extra_id].req;
+
/*
* Only the first request contains the scatter-gather
* list.
@@ -840,10 +848,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
if (setup.segments)
kunmap_atomic(setup.segments);
- /* Keep a private copy so we can reissue requests when recovering. */
- rinfo->shadow[id].req = *ring_req;
- if (unlikely(require_extra_req))
- rinfo->shadow[extra_id].req = *extra_ring_req;
+ /* Copy request(s) to the ring page. */
+ *final_ring_req = *ring_req;
+ rinfo->shadow[id].status = REQ_WAITING;
+ if (unlikely(require_extra_req)) {
+ *final_extra_ring_req = *extra_ring_req;
+ rinfo->shadow[extra_id].status = REQ_WAITING;
+ }
if (new_persistent_gnts)
gnttab_free_grant_references(setup.gref_head);
@@ -879,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
notify_remote_via_irq(rinfo->irq);
}
-static inline bool blkif_request_flush_invalid(struct request *req,
- struct blkfront_info *info)
-{
- return (blk_rq_is_passthrough(req) ||
- ((req_op(req) == REQ_OP_FLUSH) &&
- !info->feature_flush) ||
- ((req->cmd_flags & REQ_FUA) &&
- !info->feature_fua));
-}
-
static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
@@ -900,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
rinfo = get_rinfo(info, qid);
blk_mq_start_request(qd->rq);
spin_lock_irqsave(&rinfo->ring_lock, flags);
- if (RING_FULL(&rinfo->ring))
- goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
- goto out_err;
+ /*
+ * Check if the backend actually supports flushes.
+ *
+ * While the block layer won't send us flushes if we don't claim to
+ * support them, the Xen protocol allows the backend to revoke support
+ * at any time. That is of course a really bad idea and dangerous, but
+ * has been allowed for 10+ years. In that case we simply clear the
+ * flags, and directly return here for an empty flush and ignore the
+ * FUA flag later on.
+ */
+ if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush))
+ goto complete;
+ if (RING_FULL(&rinfo->ring))
+ goto out_busy;
if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
@@ -913,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_OK;
-out_err:
- spin_unlock_irqrestore(&rinfo->ring_lock, flags);
- return BLK_STS_IOERR;
-
out_busy:
blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_DEV_RESOURCE;
+complete:
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ blk_mq_end_request(qd->rq, BLK_STS_OK);
+ return BLK_STS_OK;
}
static void blkif_complete_rq(struct request *rq)
@@ -933,39 +944,41 @@ static const struct blk_mq_ops blkfront_mq_ops = {
.complete = blkif_complete_rq,
};
-static void blkif_set_queue_limits(struct blkfront_info *info)
+static void blkif_set_queue_limits(const struct blkfront_info *info,
+ struct queue_limits *lim)
{
- struct request_queue *rq = info->rq;
- struct gendisk *gd = info->gd;
unsigned int segments = info->max_indirect_segments ? :
BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
-
if (info->feature_discard) {
- blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
- blk_queue_max_discard_sectors(rq, get_capacity(gd));
- rq->limits.discard_granularity = info->discard_granularity ?:
- info->physical_sector_size;
- rq->limits.discard_alignment = info->discard_alignment;
+ lim->max_hw_discard_sectors = UINT_MAX;
+ if (info->discard_granularity)
+ lim->discard_granularity = info->discard_granularity;
+ lim->discard_alignment = info->discard_alignment;
if (info->feature_secdiscard)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
+ lim->max_secure_erase_sectors = UINT_MAX;
+ }
+
+ if (info->feature_flush) {
+ lim->features |= BLK_FEAT_WRITE_CACHE;
+ if (info->feature_fua)
+ lim->features |= BLK_FEAT_FUA;
}
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_logical_block_size(rq, info->sector_size);
- blk_queue_physical_block_size(rq, info->physical_sector_size);
- blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+ lim->logical_block_size = info->sector_size;
+ lim->physical_block_size = info->physical_sector_size;
+ lim->max_hw_sectors = (segments * XEN_PAGE_SIZE) / 512;
/* Each segment in a request is up to an aligned page in size. */
- blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
- blk_queue_max_segment_size(rq, PAGE_SIZE);
+ lim->seg_boundary_mask = PAGE_SIZE - 1;
+ lim->max_segment_size = PAGE_SIZE;
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+ lim->max_segments = segments / GRANTS_PER_PSEG;
/* Make sure buffer addresses are sector-aligned. */
- blk_queue_dma_alignment(rq, 511);
+ lim->dma_alignment = 511;
}
static const char *flush_info(struct blkfront_info *info)
@@ -980,13 +993,12 @@ static const char *flush_info(struct blkfront_info *info)
static void xlvbd_flush(struct blkfront_info *info)
{
- blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
- info->feature_fua ? true : false);
- pr_info("blkfront: %s: %s %s %s %s %s\n",
+ pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
"enabled;" : "disabled;", "indirect descriptors:",
- info->max_indirect_segments ? "enabled;" : "disabled;");
+ info->max_indirect_segments ? "enabled;" : "disabled;",
+ "bounce buffer:", info->bounce ? "enabled" : "disabled;");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -1058,10 +1070,9 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info,
- u16 vdisk_info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
+ struct queue_limits lim = {};
struct gendisk *gd;
int nr_minors = 1;
int err;
@@ -1105,7 +1116,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
err = xlbd_reserve_minors(minor, nr_minors);
if (err)
return err;
- err = -ENODEV;
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
@@ -1121,7 +1131,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
} else
info->tag_set.queue_depth = BLK_RING_SIZE(info);
info->tag_set.numa_node = NUMA_NO_NODE;
- info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
info->tag_set.cmd_size = sizeof(struct blkif_req);
info->tag_set.driver_data = info;
@@ -1129,7 +1138,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (err)
goto out_release_minors;
- gd = blk_mq_alloc_disk(&info->tag_set, info);
+ blkif_set_queue_limits(info, &lim);
+ gd = blk_mq_alloc_disk(&info->tag_set, &lim, info);
if (IS_ERR(gd)) {
err = PTR_ERR(gd);
goto out_free_tag_set;
@@ -1153,21 +1163,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
- blkif_set_queue_limits(info);
xlvbd_flush(info);
- if (vdisk_info & VDISK_READONLY)
+ if (info->vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
-
- if (vdisk_info & VDISK_REMOVABLE)
+ if (info->vdisk_info & VDISK_REMOVABLE)
gd->flags |= GENHD_FL_REMOVABLE;
- if (vdisk_info & VDISK_CDROM)
- gd->flags |= GENHD_FL_CD;
-
return 0;
out_free_tag_set:
@@ -1177,36 +1180,6 @@ out_release_minors:
return err;
}
-static void xlvbd_release_gendisk(struct blkfront_info *info)
-{
- unsigned int minor, nr_minors, i;
- struct blkfront_ring_info *rinfo;
-
- if (info->rq == NULL)
- return;
-
- /* No more blkif_request(). */
- blk_mq_stop_hw_queues(info->rq);
-
- for_each_rinfo(info, rinfo, i) {
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&rinfo->callback);
-
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&rinfo->work);
- }
-
- del_gendisk(info->gd);
-
- minor = info->gd->first_minor;
- nr_minors = info->gd->minors;
- xlbd_release_minors(minor, nr_minors);
-
- blk_cleanup_disk(info->gd);
- info->gd = NULL;
- blk_mq_free_tag_set(&info->tag_set);
-}
-
/* Already hold rinfo->ring_lock. */
static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{
@@ -1244,7 +1217,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- BUG_ON(info->feature_persistent);
+ BUG_ON(info->bounce);
list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
@@ -1256,12 +1229,12 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
list_for_each_entry_safe(persistent_gnt, n,
&rinfo->grants, node) {
list_del(&persistent_gnt->node);
- if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ if (persistent_gnt->gref != INVALID_GRANT_REF) {
gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
+ NULL);
rinfo->persistent_gnts_c--;
}
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1281,8 +1254,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
rinfo->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
persistent_gnt = rinfo->shadow[i].grants_used[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
- if (info->feature_persistent)
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1296,7 +1269,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
persistent_gnt = rinfo->shadow[i].indirect_grants[j];
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ gnttab_end_foreign_access(persistent_gnt->gref, NULL);
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
@@ -1317,14 +1290,8 @@ free_shadow:
flush_work(&rinfo->work);
/* Free resources associated with old device channel. */
- for (i = 0; i < info->nr_ring_pages; i++) {
- if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
- }
- }
- free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
- rinfo->ring.sring = NULL;
+ xenbus_teardown_ring((void **)&rinfo->ring.sring, info->nr_ring_pages,
+ rinfo->ring_ref);
if (rinfo->irq)
unbind_from_irqhandler(rinfo->irq, rinfo);
@@ -1397,8 +1364,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp)
static int blkif_get_final_status(enum blk_req_status s1,
enum blk_req_status s2)
{
- BUG_ON(s1 == REQ_WAITING);
- BUG_ON(s2 == REQ_WAITING);
+ BUG_ON(s1 < REQ_DONE);
+ BUG_ON(s2 < REQ_DONE);
if (s1 == REQ_ERROR || s2 == REQ_ERROR)
return BLKIF_RSP_ERROR;
@@ -1407,9 +1374,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
return BLKIF_RSP_OKAY;
}
-static bool blkif_completion(unsigned long *id,
- struct blkfront_ring_info *rinfo,
- struct blkif_response *bret)
+/*
+ * Return values:
+ * 1 response processed.
+ * 0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
+ struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
@@ -1431,8 +1404,8 @@ static bool blkif_completion(unsigned long *id,
s->status = blkif_rsp_to_req_status(bret->status);
/* Wait the second response if not yet here. */
- if (s2->status == REQ_WAITING)
- return false;
+ if (s2->status < REQ_DONE)
+ return 0;
bret->status = blkif_get_final_status(s->status,
s2->status);
@@ -1465,7 +1438,7 @@ static bool blkif_completion(unsigned long *id,
data.s = s;
num_sg = s->num_sg;
- if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
+ if (bret->operation == BLKIF_OP_READ && info->bounce) {
for_each_sg(s->sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
@@ -1483,139 +1456,172 @@ static bool blkif_completion(unsigned long *id,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < num_grant; i++) {
- if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+ if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
/*
* If the grant is still mapped by the backend (the
* backend has chosen to make this grant persistent)
* we add it at the head of the list, so it will be
* reused first.
*/
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->grants_used[i]->gref);
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->grants_used[i]->gref);
+ return -1;
+ }
list_add(&s->grants_used[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
/*
- * If the grant is not mapped by the backend we end the
- * foreign access and add it to the tail of the list,
- * so it will not be picked again unless we run out of
- * persistent grants.
+ * If the grant is not mapped by the backend we add it
+ * to the tail of the list, so it will not be picked
+ * again unless we run out of persistent grants.
*/
- gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
- s->grants_used[i]->gref = GRANT_INVALID_REF;
+ s->grants_used[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
- if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
- if (!info->feature_persistent)
- pr_alert_ratelimited("backed has not unmapped grant: %u\n",
- s->indirect_grants[i]->gref);
+ if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+ if (!info->feature_persistent) {
+ pr_alert("backed has not unmapped grant: %u\n",
+ s->indirect_grants[i]->gref);
+ return -1;
+ }
list_add(&s->indirect_grants[i]->node, &rinfo->grants);
rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
- gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
/*
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
- if (!info->feature_persistent) {
+ if (!info->bounce) {
indirect_page = s->indirect_grants[i]->page;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
- s->indirect_grants[i]->gref = GRANT_INVALID_REF;
+ s->indirect_grants[i]->gref = INVALID_GRANT_REF;
list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
}
}
}
- return true;
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
{
struct request *req;
- struct blkif_response *bret;
+ struct blkif_response bret;
RING_IDX i, rp;
unsigned long flags;
struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
struct blkfront_info *info = rinfo->dev_info;
+ unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
return IRQ_HANDLED;
+ }
spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
- rp = rinfo->ring.sring->rsp_prod;
- rmb(); /* Ensure we see queued responses up to 'rp'. */
+ rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
+ virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
+ if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
+ pr_alert("%s: illegal number of responses %u\n",
+ info->gd->disk_name, rp - rinfo->ring.rsp_cons);
+ goto err;
+ }
for (i = rinfo->ring.rsp_cons; i != rp; i++) {
unsigned long id;
+ unsigned int op;
+
+ eoiflag = 0;
+
+ RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
+ id = bret.id;
- bret = RING_GET_RESPONSE(&rinfo->ring, i);
- id = bret->id;
/*
* The backend has messed up and given us an id that we would
* never have given to it (we stamp it up to BLK_RING_SIZE -
* look in get_id_from_freelist.
*/
if (id >= BLK_RING_SIZE(info)) {
- WARN(1, "%s: response to %s has incorrect id (%ld)\n",
- info->gd->disk_name, op_name(bret->operation), id);
- /* We can't safely get the 'struct request' as
- * the id is busted. */
- continue;
+ pr_alert("%s: response has incorrect id (%ld)\n",
+ info->gd->disk_name, id);
+ goto err;
+ }
+ if (rinfo->shadow[id].status != REQ_WAITING) {
+ pr_alert("%s: response references no pending request\n",
+ info->gd->disk_name);
+ goto err;
}
+
+ rinfo->shadow[id].status = REQ_PROCESSING;
req = rinfo->shadow[id].request;
- if (bret->operation != BLKIF_OP_DISCARD) {
+ op = rinfo->shadow[id].req.operation;
+ if (op == BLKIF_OP_INDIRECT)
+ op = rinfo->shadow[id].req.u.indirect.indirect_op;
+ if (bret.operation != op) {
+ pr_alert("%s: response has wrong operation (%u instead of %u)\n",
+ info->gd->disk_name, bret.operation, op);
+ goto err;
+ }
+
+ if (bret.operation != BLKIF_OP_DISCARD) {
+ int ret;
+
/*
* We may need to wait for an extra response if the
* I/O request is split in 2
*/
- if (!blkif_completion(&id, rinfo, bret))
+ ret = blkif_completion(&id, rinfo, &bret);
+ if (!ret)
continue;
+ if (unlikely(ret < 0))
+ goto err;
}
if (add_id_to_freelist(rinfo, id)) {
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
- info->gd->disk_name, op_name(bret->operation), id);
+ info->gd->disk_name, op_name(bret.operation), id);
continue;
}
- if (bret->status == BLKIF_RSP_OKAY)
+ if (bret.status == BLKIF_RSP_OKAY)
blkif_req(req)->error = BLK_STS_OK;
else
blkif_req(req)->error = BLK_STS_IOERR;
- switch (bret->operation) {
+ switch (bret.operation) {
case BLKIF_OP_DISCARD:
- if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: %s op failed\n",
- info->gd->disk_name, op_name(bret->operation));
+
+ pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
- blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
- blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
+ blk_queue_disable_discard(rq);
+ blk_queue_disable_secure_erase(rq);
}
break;
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
- if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: %s op failed\n",
- info->gd->disk_name, op_name(bret->operation));
+ if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
+ pr_warn_ratelimited("blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
}
- if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+ if (unlikely(bret.status == BLKIF_RSP_ERROR &&
rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
- info->gd->disk_name, op_name(bret->operation));
+ pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
+ info->gd->disk_name, op_name(bret.operation));
blkif_req(req)->error = BLK_STS_NOTSUPP;
}
if (unlikely(blkif_req(req)->error)) {
@@ -1623,14 +1629,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
blkif_req(req)->error = BLK_STS_OK;
info->feature_fua = 0;
info->feature_flush = 0;
- xlvbd_flush(info);
}
fallthrough;
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
- if (unlikely(bret->status != BLKIF_RSP_OKAY))
- dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
- "request: %x\n", bret->status);
+ if (unlikely(bret.status != BLKIF_RSP_OKAY))
+ dev_dbg_ratelimited(&info->xbdev->dev,
+ "Bad return from blkdev data request: %#x\n",
+ bret.status);
break;
default:
@@ -1655,6 +1661,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+ xen_irq_lateeoi(irq, eoiflag);
+
+ return IRQ_HANDLED;
+
+ err:
+ info->connected = BLKIF_STATE_ERROR;
+
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
+
+ /* No EOI in order to avoid further interrupts. */
+
+ pr_alert("%s disabled for further use\n", info->gd->disk_name);
return IRQ_HANDLED;
}
@@ -1663,38 +1681,23 @@ static int setup_blkring(struct xenbus_device *dev,
struct blkfront_ring_info *rinfo)
{
struct blkif_sring *sring;
- int err, i;
+ int err;
struct blkfront_info *info = rinfo->dev_info;
unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
- grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
-
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = GRANT_INVALID_REF;
-
- sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
- get_order(ring_size));
- if (!sring) {
- xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
- return -ENOMEM;
- }
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
- if (err < 0) {
- free_pages((unsigned long)sring, get_order(ring_size));
- rinfo->ring.sring = NULL;
+ err = xenbus_setup_ring(dev, GFP_NOIO, (void **)&sring,
+ info->nr_ring_pages, rinfo->ring_ref);
+ if (err)
goto fail;
- }
- for (i = 0; i < info->nr_ring_pages; i++)
- rinfo->ring_ref[i] = gref[i];
+
+ XEN_FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
- "blkif", rinfo);
+ err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
+ 0, "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");
@@ -1756,11 +1759,11 @@ abort_transaction:
return err;
}
-static void free_info(struct blkfront_info *info)
-{
- list_del(&info->info_list);
- kfree(info);
-}
+/* Enable the persistent grants feature. */
+static bool feature_persistent = true;
+module_param(feature_persistent, bool, 0644);
+MODULE_PARM_DESC(feature_persistent,
+ "Enables the persistent grants feature");
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
@@ -1776,6 +1779,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
if (!info)
return -ENODEV;
+ /* Check if backend is trusted. */
+ info->bounce = !xen_blkif_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
"max-ring-page-order", 0);
ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
@@ -1849,8 +1856,9 @@ again:
message = "writing protocol";
goto abort_transaction;
}
+ info->feature_persistent_parm = feature_persistent;
err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
- info->feature_persistent);
+ info->feature_persistent_parm);
if (err)
dev_warn(&dev->dev,
"writing persistent grants feature to xenbus");
@@ -1880,13 +1888,6 @@ again:
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
blkif_free(info, 0);
-
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
-
- dev_set_drvdata(&dev->dev, NULL);
-
return err;
}
@@ -1925,12 +1926,6 @@ static int negotiate_mq(struct blkfront_info *info)
return 0;
}
-/* Enable the persistent grants feature. */
-static bool feature_persistent = true;
-module_param(feature_persistent, bool, 0644);
-MODULE_PARM_DESC(feature_persistent,
- "Enables the persistent grants feature");
-
/*
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffer for communication with the backend, and
@@ -1997,8 +1992,6 @@ static int blkfront_probe(struct xenbus_device *dev,
info->vdevice = vdevice;
info->connected = BLKIF_STATE_DISCONNECTED;
- info->feature_persistent = feature_persistent;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
@@ -2012,18 +2005,19 @@ static int blkfront_probe(struct xenbus_device *dev,
static int blkif_recover(struct blkfront_info *info)
{
+ struct queue_limits lim;
unsigned int r_index;
struct request *req, *n;
int rc;
struct bio *bio;
- unsigned int segs;
struct blkfront_ring_info *rinfo;
+ lim = queue_limits_start_update(info->rq);
blkfront_gather_backend_features(info);
- /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
- blkif_set_queue_limits(info);
- segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
- blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
+ blkif_set_queue_limits(info, &lim);
+ rc = queue_limits_commit_update(info->rq, &lim);
+ if (rc)
+ return rc;
for_each_rinfo(info, rinfo, r_index) {
rc = blkfront_setup_indirect(rinfo);
@@ -2043,7 +2037,9 @@ static int blkif_recover(struct blkfront_info *info)
list_for_each_entry_safe(req, n, &info->requests, queuelist) {
/* Requeue pending requests (flush or discard) */
list_del_init(&req->queuelist);
- BUG_ON(req->nr_phys_segments > segs);
+ BUG_ON(req->nr_phys_segments >
+ (info->max_indirect_segments ? :
+ BLKIF_MAX_SEGMENTS_PER_REQUEST));
blk_mq_requeue_request(req, false);
}
blk_mq_start_stopped_hw_queues(info->rq, true);
@@ -2126,38 +2122,27 @@ static int blkfront_resume(struct xenbus_device *dev)
static void blkfront_closing(struct blkfront_info *info)
{
struct xenbus_device *xbdev = info->xbdev;
- struct block_device *bdev = NULL;
-
- mutex_lock(&info->mutex);
+ struct blkfront_ring_info *rinfo;
+ unsigned int i;
- if (xbdev->state == XenbusStateClosing) {
- mutex_unlock(&info->mutex);
+ if (xbdev->state == XenbusStateClosing)
return;
- }
-
- if (info->gd)
- bdev = bdgrab(info->gd->part0);
-
- mutex_unlock(&info->mutex);
- if (!bdev) {
- xenbus_frontend_closed(xbdev);
- return;
+ /* No more blkif_request(). */
+ if (info->rq && info->gd) {
+ blk_mq_stop_hw_queues(info->rq);
+ blk_mark_disk_dead(info->gd);
}
- mutex_lock(&bdev->bd_disk->open_mutex);
+ for_each_rinfo(info, rinfo, i) {
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&rinfo->callback);
- if (bdev->bd_openers) {
- xenbus_dev_error(xbdev, -EBUSY,
- "Device in use; refusing to close");
- xenbus_switch_state(xbdev, XenbusStateClosing);
- } else {
- xlvbd_release_gendisk(info);
- xenbus_frontend_closed(xbdev);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_work(&rinfo->work);
}
- mutex_unlock(&bdev->bd_disk->open_mutex);
- bdput(bdev);
+ xenbus_frontend_closed(xbdev);
}
static void blkfront_setup_discard(struct blkfront_info *info)
@@ -2202,17 +2187,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
if (err)
goto out_of_memory;
- if (!info->feature_persistent && info->max_indirect_segments) {
+ if (!info->bounce && info->max_indirect_segments) {
/*
- * We are using indirect descriptors but not persistent
- * grants, we need to allocate a set of pages that can be
+ * We are using indirect descriptors but don't have a bounce
+ * buffer, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
- struct page *indirect_page = alloc_page(GFP_KERNEL);
+ struct page *indirect_page = alloc_page(GFP_KERNEL |
+ __GFP_ZERO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
@@ -2301,10 +2287,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0))
blkfront_setup_discard(info);
- if (info->feature_persistent)
+ if (info->feature_persistent_parm)
info->feature_persistent =
!!xenbus_read_unsigned(info->xbdev->otherend,
"feature-persistent", 0);
+ if (info->feature_persistent)
+ info->bounce = true;
indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
"feature-max-indirect-segments", 0);
@@ -2328,9 +2316,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
- unsigned int binfo;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2368,8 +2353,8 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
- "info", "%u", &binfo,
- "sector-size", "%lu", &sector_size,
+ "info", "%u", &info->vdisk_info,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2383,9 +2368,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2397,8 +2382,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
@@ -2412,7 +2396,13 @@ static void blkfront_connect(struct blkfront_info *info)
for_each_rinfo(info, rinfo, i)
kick_pending_request_queues(rinfo);
- device_add_disk(&info->xbdev->dev, info->gd, NULL);
+ err = device_add_disk(&info->xbdev->dev, info->gd, NULL);
+ if (err) {
+ put_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
+ info->rq = NULL;
+ goto fail;
+ }
info->is_ready = 1;
return;
@@ -2472,66 +2462,32 @@ static void blkback_changed(struct xenbus_device *dev,
break;
fallthrough;
case XenbusStateClosing:
- if (info)
- blkfront_closing(info);
+ blkfront_closing(info);
break;
}
}
-static int blkfront_remove(struct xenbus_device *xbdev)
+static void blkfront_remove(struct xenbus_device *xbdev)
{
struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
- struct block_device *bdev = NULL;
- struct gendisk *disk;
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
- if (!info)
- return 0;
-
- blkif_free(info, 0);
-
- mutex_lock(&info->mutex);
-
- disk = info->gd;
- if (disk)
- bdev = bdgrab(disk->part0);
-
- info->xbdev = NULL;
- mutex_unlock(&info->mutex);
-
- if (!bdev) {
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
- return 0;
- }
-
- /*
- * The xbdev was removed before we reached the Closed
- * state. See if it's safe to remove the disk. If the bdev
- * isn't closed yet, we let release take care of it.
- */
-
- mutex_lock(&disk->open_mutex);
- info = disk->private_data;
+ if (info->gd)
+ del_gendisk(info->gd);
- dev_warn(disk_to_dev(disk),
- "%s was hot-unplugged, %d stale handles\n",
- xbdev->nodename, bdev->bd_openers);
+ mutex_lock(&blkfront_mutex);
+ list_del(&info->info_list);
+ mutex_unlock(&blkfront_mutex);
- if (info && !bdev->bd_openers) {
- xlvbd_release_gendisk(info);
- disk->private_data = NULL;
- mutex_lock(&blkfront_mutex);
- free_info(info);
- mutex_unlock(&blkfront_mutex);
+ blkif_free(info, 0);
+ if (info->gd) {
+ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+ put_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
}
- mutex_unlock(&disk->open_mutex);
- bdput(bdev);
-
- return 0;
+ kfree(info);
}
static int blkfront_is_ready(struct xenbus_device *dev)
@@ -2541,77 +2497,9 @@ static int blkfront_is_ready(struct xenbus_device *dev)
return info->is_ready && info->xbdev;
}
-static int blkif_open(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- struct blkfront_info *info;
- int err = 0;
-
- mutex_lock(&blkfront_mutex);
-
- info = disk->private_data;
- if (!info) {
- /* xbdev gone */
- err = -ERESTARTSYS;
- goto out;
- }
-
- mutex_lock(&info->mutex);
-
- if (!info->gd)
- /* xbdev is closed */
- err = -ERESTARTSYS;
-
- mutex_unlock(&info->mutex);
-
-out:
- mutex_unlock(&blkfront_mutex);
- return err;
-}
-
-static void blkif_release(struct gendisk *disk, fmode_t mode)
-{
- struct blkfront_info *info = disk->private_data;
- struct xenbus_device *xbdev;
-
- mutex_lock(&blkfront_mutex);
- if (disk->part0->bd_openers)
- goto out_mutex;
-
- /*
- * Check if we have been instructed to close. We will have
- * deferred this request, because the bdev was still open.
- */
-
- mutex_lock(&info->mutex);
- xbdev = info->xbdev;
-
- if (xbdev && xbdev->state == XenbusStateClosing) {
- /* pending switch to state closed */
- dev_info(disk_to_dev(disk), "releasing disk\n");
- xlvbd_release_gendisk(info);
- xenbus_frontend_closed(info->xbdev);
- }
-
- mutex_unlock(&info->mutex);
-
- if (!xbdev) {
- /* sudden device removal */
- dev_info(disk_to_dev(disk), "releasing disk\n");
- xlvbd_release_gendisk(info);
- disk->private_data = NULL;
- free_info(info);
- }
-
-out_mutex:
- mutex_unlock(&blkfront_mutex);
-}
-
static const struct block_device_operations xlvbd_block_fops =
{
.owner = THIS_MODULE,
- .open = blkif_open,
- .release = blkif_release,
.getgeo = blkif_getgeo,
.ioctl = blkif_ioctl,
.compat_ioctl = blkdev_compat_ptr_ioctl,
@@ -2640,6 +2528,7 @@ static void purge_persistent_grants(struct blkfront_info *info)
for_each_rinfo(info, rinfo, i) {
struct grant *gnt_list_entry, *tmp;
+ LIST_HEAD(grants);
spin_lock_irqsave(&rinfo->ring_lock, flags);
@@ -2650,17 +2539,18 @@ static void purge_persistent_grants(struct blkfront_info *info)
list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
node) {
- if (gnt_list_entry->gref == GRANT_INVALID_REF ||
- gnttab_query_foreign_access(gnt_list_entry->gref))
+ if (gnt_list_entry->gref == INVALID_GRANT_REF ||
+ !gnttab_try_end_foreign_access(gnt_list_entry->gref))
continue;
list_del(&gnt_list_entry->node);
- gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
rinfo->persistent_gnts_c--;
- gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add_tail(&gnt_list_entry->node, &rinfo->grants);
+ gnt_list_entry->gref = INVALID_GRANT_REF;
+ list_add_tail(&gnt_list_entry->node, &grants);
}
+ list_splice_tail(&grants, &rinfo->grants);
+
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
}
@@ -2670,6 +2560,13 @@ static void blkfront_delay_work(struct work_struct *work)
struct blkfront_info *info;
bool need_schedule_work = false;
+ /*
+ * Note that when using bounce buffers but not persistent grants
+ * there's no need to run blkfront_delay_work because grants are
+ * revoked in blkif_completion or else an error is reported and the
+ * connection is closed.
+ */
+
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {