summaryrefslogtreecommitdiff
path: root/include/linux/ceph/osd_client.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/ceph/osd_client.h')
-rw-r--r--include/linux/ceph/osd_client.h93
1 files changed, 90 insertions, 3 deletions
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fb6be72104df..bf9823956758 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -29,14 +29,62 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
#define CEPH_HOMELESS_OSD -1
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+ u64 off;
+ u64 len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+ CEPH_SPARSE_READ_HDR = 0,
+ CEPH_SPARSE_READ_EXTENTS,
+ CEPH_SPARSE_READ_DATA_LEN,
+ CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+ enum ceph_sparse_read_state sr_state; /* state machine state */
+ u64 sr_req_off; /* orig request offset */
+ u64 sr_req_len; /* orig request length */
+ u64 sr_pos; /* current pos in buffer */
+ int sr_index; /* current extent index */
+ __le32 sr_datalen; /* length of actual data */
+ u32 sr_count; /* extent count in reply */
+ int sr_ext_len; /* length of extent array */
+ struct ceph_sparse_extent *sr_extent; /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
struct ceph_osd {
refcount_t o_ref;
+ int o_sparse_op_idx;
struct ceph_osd_client *o_osdc;
int o_osd;
int o_incarnation;
struct rb_node o_node;
struct ceph_connection o_con;
+ spinlock_t o_requests_lock;
struct rb_root o_requests;
struct rb_root o_linger_requests;
struct rb_root o_backoff_mappings;
@@ -46,6 +94,7 @@ struct ceph_osd {
unsigned long lru_ttl;
struct list_head o_keepalive_item;
struct mutex lock;
+ struct ceph_sparse_read o_sparse_read;
};
#define CEPH_OSD_SLAB_OPS 2
@@ -59,6 +108,7 @@ enum ceph_osd_data_type {
CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
CEPH_OSD_DATA_TYPE_BVECS,
+ CEPH_OSD_DATA_TYPE_ITER,
};
struct ceph_osd_data {
@@ -82,6 +132,7 @@ struct ceph_osd_data {
struct ceph_bvec_iter bvec_pos;
u32 num_bvecs;
};
+ struct iov_iter iter;
};
};
@@ -98,6 +149,8 @@ struct ceph_osd_req_op {
u64 offset, length;
u64 truncate_size;
u32 truncate_seq;
+ int sparse_ext_cnt;
+ struct ceph_sparse_extent *sparse_ext;
struct ceph_osd_data osd_data;
} extent;
struct {
@@ -145,6 +198,9 @@ struct ceph_osd_req_op {
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
+ struct {
+ u64 ver;
+ } assert_ver;
};
};
@@ -199,6 +255,7 @@ struct ceph_osd_request {
struct ceph_osd_client *r_osdc;
struct kref r_kref;
bool r_mempool;
+ bool r_linger; /* don't resend on failure */
struct completion r_completion; /* private to osd_client.c */
ceph_osdc_callback_t r_callback;
@@ -211,9 +268,9 @@ struct ceph_osd_request {
struct ceph_snap_context *r_snapc; /* for writes */
struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
- bool r_linger; /* don't resend on failure */
/* internal */
+ u64 r_version; /* data version sent in reply */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
ktime_t r_start_latency; /* ktime_t */
@@ -450,6 +507,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter);
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
unsigned int which,
@@ -504,6 +563,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
u32 truncate_seq, u64 truncate_size,
bool use_mempool);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value. If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+{
+ return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+}
+
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
@@ -558,5 +631,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers);
-#endif
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+ struct ceph_sparse_extent *ext;
+
+ /* No extents? No data */
+ if (op->extent.sparse_ext_cnt == 0)
+ return 0;
+
+ ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+ return ext->off + ext->len - op->extent.offset;
+}
+
+#endif