summaryrefslogtreecommitdiff
path: root/include/uapi/linux/raid
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2015-08-13 14:31:59 -0700
committerNeilBrown <neilb@suse.com>2015-10-24 17:16:19 +1100
commitf6bed0ef0a808164f51197de062e0450ce6c1f96 (patch)
tree9fc2ec276f40ef36eaa14e295f543595472b56a9 /include/uapi/linux/raid
parentb70abcb24711d1327a8a505ab3e931c24cbab0a7 (diff)
raid5: add basic stripe log
This introduces a simple log for raid5. Data/parity writing to raid array first writes to the log, then write to raid array disks. If crash happens, we can recovery data from the log. This can speed up raid resync and fix write hole issue. The log structure is pretty simple. Data/meta data is stored in block unit, which is 4k generally. It has only one type of meta data block. The meta data block can track 3 types of data, stripe data, stripe parity and flush block. MD superblock will point to the last valid meta data block. Each meta data block has checksum/seq number, so recovery can scan the log correctly. We store a checksum of stripe data/parity to the metadata block, so meta data and stripe data/parity can be written to log disk together. otherwise, meta data write must wait till stripe data/parity is finished. For stripe data, meta data block will record stripe data sector and size. Currently the size is always 4k. This meta data record can be made simpler if we just fix write hole (eg, we can record data of a stripe's different disks together), but this format can be extended to support caching in the future, which must record data address/size. For stripe parity, meta data block will record stripe sector. It's size should be 4k (for raid5) or 8k (for raid6). We always store p parity first. This format should work for caching too. flush block indicates a stripe is in raid array disks. Fixing write hole doesn't need this type of meta data, it's for caching extension. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: NeilBrown <neilb@suse.com>
Diffstat (limited to 'include/uapi/linux/raid')
-rw-r--r--include/uapi/linux/raid/md_p.h58
1 files changed, 58 insertions, 0 deletions
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index a5f54ff26c20..96e4196f9c79 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -324,4 +324,62 @@ struct mdp_superblock_1 {
|MD_FEATURE_CLUSTERED \
)
+struct r5l_payload_header {
+ __le16 type;
+ __le16 flags;
+} __attribute__ ((__packed__));
+
+enum r5l_payload_type {
+ R5LOG_PAYLOAD_DATA = 0,
+ R5LOG_PAYLOAD_PARITY = 1,
+ R5LOG_PAYLOAD_FLUSH = 2,
+};
+
+struct r5l_payload_data_parity {
+ struct r5l_payload_header header;
+ __le32 size; /* sector. data/parity size. each 4k
+ * has a checksum */
+ __le64 location; /* sector. For data, it's raid sector. For
+ * parity, it's stripe sector */
+ __le32 checksum[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_data_parity_flag {
+ R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */
+ /*
+ * RESHAPED/RESHAPING is only set when there is reshape activity. Note,
+ * both data/parity of a stripe should have the same flag set
+ *
+ * RESHAPED: reshape is running, and this stripe finished reshape
+ * RESHAPING: reshape is running, and this stripe isn't reshaped
+ */
+ R5LOG_PAYLOAD_FLAG_RESHAPED = 2,
+ R5LOG_PAYLOAD_FLAG_RESHAPING = 3,
+};
+
+struct r5l_payload_flush {
+ struct r5l_payload_header header;
+ __le32 size; /* flush_stripes size, bytes */
+ __le64 flush_stripes[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_flush_flag {
+ R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */
+};
+
+struct r5l_meta_block {
+ __le32 magic;
+ __le32 checksum;
+ __u8 version;
+ __u8 __zero_pading_1;
+ __le16 __zero_pading_2;
+ __le32 meta_size; /* whole size of the block */
+
+ __le64 seq;
+ __le64 position; /* sector, start from rdev->data_offset, current position */
+ struct r5l_payload_header payloads[];
+} __attribute__ ((__packed__));
+
+#define R5LOG_VERSION 0x1
+#define R5LOG_MAGIC 0x6433c509
#endif