summaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkback/common.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback/common.h')
-rw-r--r--drivers/block/xen-blkback/common.h313
1 files changed, 185 insertions, 128 deletions
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 60103e2517ba..b427d54bc120 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -36,19 +36,36 @@
#include <linux/io.h>
#include <linux/rbtree.h>
#include <asm/setup.h>
-#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
#include <xen/grant_table.h>
+#include <xen/page.h>
#include <xen/xenbus.h>
#include <xen/interface/io/ring.h>
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
-#define DRV_PFX "xen-blkback:"
-#define DPRINTK(fmt, args...) \
- pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
- __func__, __LINE__, ##args)
+extern unsigned int xen_blkif_max_ring_order;
+extern unsigned int xenblk_max_queues;
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT (PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+ (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+#define SEGS_PER_INDIRECT_FRAME \
+ (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
+#define MAX_INDIRECT_PAGES \
+ ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
/* Not a real protocol. Used to generate ring structs which contain
* the elements common to all protocols only. This way we get a
@@ -57,9 +74,8 @@
struct blkif_common_request {
char dummy;
};
-struct blkif_common_response {
- char dummy;
-};
+
+/* i386 protocol version */
struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
@@ -83,29 +99,40 @@ struct blkif_x86_32_request_other {
uint64_t id; /* private guest value, echoed in resp */
} __attribute__((__packed__));
+struct blkif_x86_32_request_indirect {
+ uint8_t indirect_op;
+ uint16_t nr_segments;
+ uint64_t id;
+ blkif_sector_t sector_number;
+ blkif_vdev_t handle;
+ uint16_t _pad1;
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+ /*
+ * The maximum number of indirect segments (and pages) that will
+ * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+ * is also exported to the guest (via xenstore
+ * feature-max-indirect-segments entry), so the frontend knows how
+ * many indirect segments the backend supports.
+ */
+ uint64_t _pad2; /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
struct blkif_x86_32_request {
uint8_t operation; /* BLKIF_OP_??? */
union {
struct blkif_x86_32_request_rw rw;
struct blkif_x86_32_request_discard discard;
struct blkif_x86_32_request_other other;
+ struct blkif_x86_32_request_indirect indirect;
} u;
} __attribute__((__packed__));
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_response {
- uint64_t id; /* copied from request */
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-#pragma pack(pop)
/* x86_64 protocol version */
struct blkif_x86_64_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
- uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
+ uint32_t _pad1; /* offsetof(blkif_request..,u.rw.id)==8 */
uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -127,27 +154,41 @@ struct blkif_x86_64_request_other {
uint64_t id; /* private guest value, echoed in resp */
} __attribute__((__packed__));
+struct blkif_x86_64_request_indirect {
+ uint8_t indirect_op;
+ uint16_t nr_segments;
+ uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */
+ uint64_t id;
+ blkif_sector_t sector_number;
+ blkif_vdev_t handle;
+ uint16_t _pad2;
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+ /*
+ * The maximum number of indirect segments (and pages) that will
+ * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+ * is also exported to the guest (via xenstore
+ * feature-max-indirect-segments entry), so the frontend knows how
+ * many indirect segments the backend supports.
+ */
+ uint32_t _pad3; /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
struct blkif_x86_64_request {
uint8_t operation; /* BLKIF_OP_??? */
union {
struct blkif_x86_64_request_rw rw;
struct blkif_x86_64_request_discard discard;
struct blkif_x86_64_request_other other;
+ struct blkif_x86_64_request_indirect indirect;
} u;
} __attribute__((__packed__));
-struct blkif_x86_64_response {
- uint64_t __attribute__((__aligned__(8))) id;
- uint8_t operation; /* copied from request */
- int16_t status; /* BLKIF_RSP_??? */
-};
-
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
- struct blkif_common_response);
+ struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
- struct blkif_x86_32_response);
+ struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
- struct blkif_x86_64_response);
+ struct blkif_response);
union blkif_back_rings {
struct blkif_back_ring native;
@@ -162,6 +203,15 @@ enum blkif_protocol {
BLKIF_PROTOCOL_X86_64 = 3,
};
+/*
+ * Default protocol if the frontend doesn't specify one.
+ */
+#ifdef CONFIG_X86
+# define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
+#else
+# define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
+#endif
+
struct xen_vbd {
/* What the domain refers to this vbd as. */
blkif_vdev_t handle;
@@ -171,78 +221,152 @@ struct xen_vbd {
unsigned char type;
/* phys device that this vbd maps to. */
u32 pdevice;
- struct block_device *bdev;
+ struct file *bdev_file;
/* Cached size parameter. */
sector_t size;
unsigned int flush_support:1;
unsigned int discard_secure:1;
+ /* Connect-time cached feature_persistent parameter value */
+ unsigned int feature_gnt_persistent_parm:1;
+ /* Persistent grants feature negotiation result */
unsigned int feature_gnt_persistent:1;
unsigned int overflow_max_grants:1;
};
struct backend_info;
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS_PER_PAGE 32
struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
+ unsigned long last_used;
+ bool active;
struct rb_node node;
+ struct list_head remove_node;
+};
+
+/* Per-ring information. */
+struct xen_blkif_ring {
+ /* Physical parameters of the comms window. */
+ unsigned int irq;
+ union blkif_back_rings blk_rings;
+ void *blk_ring;
+ /* Private fields. */
+ spinlock_t blk_ring_lock;
+
+ wait_queue_head_t wq;
+ atomic_t inflight;
+ bool active;
+ /* One thread per blkif ring. */
+ struct task_struct *xenblkd;
+ unsigned int waiting_reqs;
+
+ /* List of all 'pending_req' available */
+ struct list_head pending_free;
+ /* And its spinlock. */
+ spinlock_t pending_free_lock;
+ wait_queue_head_t pending_free_wq;
+
+ /* Tree to store persistent grants. */
+ struct rb_root persistent_gnts;
+ unsigned int persistent_gnt_c;
+ atomic_t persistent_gnt_in_use;
+ unsigned long next_lru;
+
+ /* Statistics. */
+ unsigned long st_print;
+ unsigned long long st_rd_req;
+ unsigned long long st_wr_req;
+ unsigned long long st_oo_req;
+ unsigned long long st_f_req;
+ unsigned long long st_ds_req;
+ unsigned long long st_rd_sect;
+ unsigned long long st_wr_sect;
+
+ /* Used by the kworker that offload work from the persistent purge. */
+ struct list_head persistent_purge_list;
+ struct work_struct persistent_purge_work;
+
+ /* Buffer of free pages to map grant refs. */
+ struct gnttab_page_cache free_pages;
+
+ struct work_struct free_work;
+ /* Thread shutdown wait queue. */
+ wait_queue_head_t shutdown_wq;
+ struct xen_blkif *blkif;
};
struct xen_blkif {
/* Unique identifier for this interface. */
domid_t domid;
unsigned int handle;
- /* Physical parameters of the comms window. */
- unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
- union blkif_back_rings blk_rings;
- void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
- /* Private fields. */
- spinlock_t blk_ring_lock;
atomic_t refcnt;
-
- wait_queue_head_t wq;
/* for barrier (drain) requests */
struct completion drain_complete;
atomic_t drain;
- /* One thread per one blkif. */
- struct task_struct *xenblkd;
- unsigned int waiting_reqs;
- /* tree to store persistent grants */
- struct rb_root persistent_gnts;
- unsigned int persistent_gnt_c;
+ struct work_struct free_work;
+ unsigned int nr_ring_pages;
+ bool multi_ref;
+ /* All rings for this device. */
+ struct xen_blkif_ring *rings;
+ unsigned int nr_rings;
+ unsigned long buffer_squeeze_end;
+};
- /* statistics */
- unsigned long st_print;
- unsigned long long st_rd_req;
- unsigned long long st_wr_req;
- unsigned long long st_oo_req;
- unsigned long long st_f_req;
- unsigned long long st_ds_req;
- unsigned long long st_rd_sect;
- unsigned long long st_wr_sect;
-
- wait_queue_head_t waiting_to_free;
+struct seg_buf {
+ unsigned long offset;
+ unsigned int nsec;
+};
+
+struct grant_page {
+ struct page *page;
+ struct persistent_gnt *persistent_gnt;
+ grant_handle_t handle;
+ grant_ref_t gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+ struct xen_blkif_ring *ring;
+ u64 id;
+ int nr_segs;
+ atomic_t pendcnt;
+ unsigned short operation;
+ int status;
+ struct list_head free_list;
+ struct grant_page *segments[MAX_INDIRECT_SEGMENTS];
+ /* Indirect descriptors */
+ struct grant_page *indirect_pages[MAX_INDIRECT_PAGES];
+ struct seg_buf seg[MAX_INDIRECT_SEGMENTS];
+ struct bio *biolist[MAX_INDIRECT_SEGMENTS];
+ struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
+ struct page *unmap_pages[MAX_INDIRECT_SEGMENTS];
+ struct gntab_unmap_queue_data gnttab_unmap_data;
};
-#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
- (_v)->bdev->bd_part->nr_sects : \
- get_capacity((_v)->bdev->bd_disk))
+#define vbd_sz(_v) bdev_nr_sectors(file_bdev((_v)->bdev_file))
#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define xen_blkif_put(_b) \
do { \
if (atomic_dec_and_test(&(_b)->refcnt)) \
- wake_up(&(_b)->waiting_to_free);\
+ schedule_work(&(_b)->free_work);\
} while (0)
struct phys_req {
@@ -251,12 +375,16 @@ struct phys_req {
struct block_device *bdev;
blkif_sector_t sector_number;
};
+
int xen_blkif_interface_init(void);
+void xen_blkif_interface_fini(void);
int xen_blkif_xenbus_init(void);
+void xen_blkif_xenbus_fini(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg);
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
@@ -264,77 +392,6 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
-
-static inline void blkif_get_x86_32_req(struct blkif_request *dst,
- struct blkif_x86_32_request *src)
-{
- int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
- dst->operation = src->operation;
- switch (src->operation) {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- case BLKIF_OP_WRITE_BARRIER:
- case BLKIF_OP_FLUSH_DISKCACHE:
- dst->u.rw.nr_segments = src->u.rw.nr_segments;
- dst->u.rw.handle = src->u.rw.handle;
- dst->u.rw.id = src->u.rw.id;
- dst->u.rw.sector_number = src->u.rw.sector_number;
- barrier();
- if (n > dst->u.rw.nr_segments)
- n = dst->u.rw.nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->u.rw.seg[i];
- break;
- case BLKIF_OP_DISCARD:
- dst->u.discard.flag = src->u.discard.flag;
- dst->u.discard.id = src->u.discard.id;
- dst->u.discard.sector_number = src->u.discard.sector_number;
- dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
- break;
- default:
- /*
- * Don't know how to translate this op. Only get the
- * ID so failure can be reported to the frontend.
- */
- dst->u.other.id = src->u.other.id;
- break;
- }
-}
-
-static inline void blkif_get_x86_64_req(struct blkif_request *dst,
- struct blkif_x86_64_request *src)
-{
- int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
- dst->operation = src->operation;
- switch (src->operation) {
- case BLKIF_OP_READ:
- case BLKIF_OP_WRITE:
- case BLKIF_OP_WRITE_BARRIER:
- case BLKIF_OP_FLUSH_DISKCACHE:
- dst->u.rw.nr_segments = src->u.rw.nr_segments;
- dst->u.rw.handle = src->u.rw.handle;
- dst->u.rw.id = src->u.rw.id;
- dst->u.rw.sector_number = src->u.rw.sector_number;
- barrier();
- if (n > dst->u.rw.nr_segments)
- n = dst->u.rw.nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->u.rw.seg[i];
- break;
- case BLKIF_OP_DISCARD:
- dst->u.discard.flag = src->u.discard.flag;
- dst->u.discard.id = src->u.discard.id;
- dst->u.discard.sector_number = src->u.discard.sector_number;
- dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
- break;
- default:
- /*
- * Don't know how to translate this op. Only get the
- * ID so failure can be reported to the frontend.
- */
- dst->u.other.id = src->u.other.id;
- break;
- }
-}
+void xen_blkbk_unmap_purged_grants(struct work_struct *work);
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */