diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-09-25 07:48:20 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-09-25 07:48:20 -0600 |
commit | 163090c14a42778c3ccfbdaf39133129bea68632 (patch) | |
tree | 8517646317457beb0a9a3662320686ac46cfea6a | |
parent | 0905053bdb5b7ba77ad0c2e5cfc4787c1db3d4f1 (diff) | |
parent | d3ee2d8415a6256c1c41e1be36e80e640c3e6359 (diff) |
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.10/drivers
Pull MD updates from Song.
* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
md/raid10: improve discard request for far layout
md/raid10: improve raid10 discard request
md/raid10: pull codes that wait for blocked dev into one function
md/raid10: extend r10bio devs to raid disks
md: add md_submit_discard_bio() for submitting discard bio
md: Simplify code with existing definition RESYNC_SECTORS in raid10.c
md/raid5: reallocate page array after setting new stripe_size
md/raid5: resize stripe_head when reshape array
md/raid5: let multiple devices of stripe_head share page
md/raid6: let async recovery function support different page offset
md/raid6: let syndrome computor support different page offset
md/raid5: convert to new xor compution interface
md/raid5: add new xor function to support different page offset
md/raid5: make async_copy_data() to support different page offset
md/raid5: add a new member of offset into r5dev
md: only calculate blocksize once and use i_blocksize()
-rw-r--r-- | crypto/async_tx/async_pq.c | 72 | ||||
-rw-r--r-- | crypto/async_tx/async_raid6_recov.c | 163 | ||||
-rw-r--r-- | crypto/async_tx/async_xor.c | 120 | ||||
-rw-r--r-- | crypto/async_tx/raid6test.c | 24 | ||||
-rw-r--r-- | drivers/md/md-bitmap.c | 7 | ||||
-rw-r--r-- | drivers/md/md.c | 20 | ||||
-rw-r--r-- | drivers/md/md.h | 2 | ||||
-rw-r--r-- | drivers/md/raid0.c | 14 | ||||
-rw-r--r-- | drivers/md/raid10.c | 431 | ||||
-rw-r--r-- | drivers/md/raid10.h | 1 | ||||
-rw-r--r-- | drivers/md/raid5.c | 274 | ||||
-rw-r--r-- | drivers/md/raid5.h | 29 | ||||
-rw-r--r-- | include/linux/async_tx.h | 23 |
13 files changed, 967 insertions, 213 deletions
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 341ece61cf9b..f9cdc5e91664 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -104,7 +104,7 @@ do_async_gen_syndrome(struct dma_chan *chan, * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome */ static void -do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, +do_sync_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, size_t len, struct async_submit_ctl *submit) { void **srcs; @@ -121,7 +121,8 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, BUG_ON(i > disks - 3); /* P or Q can't be zero */ srcs[i] = (void*)raid6_empty_zero_page; } else { - srcs[i] = page_address(blocks[i]) + offset; + srcs[i] = page_address(blocks[i]) + offsets[i]; + if (i < disks - 2) { stop = i; if (start == -1) @@ -138,10 +139,23 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, async_tx_sync_epilog(submit); } +static inline bool +is_dma_pq_aligned_offs(struct dma_device *dev, unsigned int *offs, + int src_cnt, size_t len) +{ + int i; + + for (i = 0; i < src_cnt; i++) { + if (!is_dma_pq_aligned(dev, offs[i], 0, len)) + return false; + } + return true; +} + /** * async_gen_syndrome - asynchronously calculate a raid6 syndrome * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1 - * @offset: common offset into each block (src and dest) to start transaction + * @offsets: offset array into each block (src and dest) to start transaction * @disks: number of blocks (including missing P or Q, see below) * @len: length of operation in bytes * @submit: submission/completion modifiers @@ -160,7 +174,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, * path. */ struct dma_async_tx_descriptor * -async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, +async_gen_syndrome(struct page **blocks, unsigned int *offsets, int disks, size_t len, struct async_submit_ctl *submit) { int src_cnt = disks - 2; @@ -179,7 +193,7 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) && (src_cnt <= dma_maxpq(device, 0) || dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && - is_dma_pq_aligned(device, offset, 0, len)) { + is_dma_pq_aligned_offs(device, offsets, disks, len)) { struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = 0; unsigned char coefs[MAX_DISKS]; @@ -196,8 +210,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, for (i = 0, j = 0; i < src_cnt; i++) { if (blocks[i] == NULL) continue; - unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, - len, DMA_TO_DEVICE); + unmap->addr[j] = dma_map_page(device->dev, blocks[i], + offsets[i], len, DMA_TO_DEVICE); coefs[j] = raid6_gfexp[i]; unmap->to_cnt++; j++; @@ -210,7 +224,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, unmap->bidi_cnt++; if (P(blocks, disks)) unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), - offset, len, DMA_BIDIRECTIONAL); + P(offsets, disks), + len, DMA_BIDIRECTIONAL); else { unmap->addr[j++] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_P; @@ -219,7 +234,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, unmap->bidi_cnt++; if (Q(blocks, disks)) unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), - offset, len, DMA_BIDIRECTIONAL); + Q(offsets, disks), + len, DMA_BIDIRECTIONAL); else { unmap->addr[j++] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_Q; @@ -240,13 +256,13 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, if (!P(blocks, disks)) { P(blocks, disks) = pq_scribble_page; - BUG_ON(len + offset > PAGE_SIZE); + P(offsets, disks) = 0; } if (!Q(blocks, disks)) { Q(blocks, disks) = pq_scribble_page; - BUG_ON(len + offset > PAGE_SIZE); + Q(offsets, disks) = 0; } - do_sync_gen_syndrome(blocks, offset, disks, len, submit); + do_sync_gen_syndrome(blocks, offsets, disks, len, submit); return NULL; } @@ -270,6 +286,7 @@ pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, si * @len: length of operation in bytes * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set * @spare: temporary result buffer for the synchronous case + * @s_off: spare buffer page offset * @submit: submission / completion modifiers * * The same notes from async_gen_syndrome apply to the 'blocks', @@ -278,9 +295,9 @@ pq_val_chan(struct async_submit_ctl *submit, struct page **blocks, int disks, si * specified. */ struct dma_async_tx_descriptor * -async_syndrome_val(struct page **blocks, unsigned int offset, int disks, +async_syndrome_val(struct page **blocks, unsigned int *offsets, int disks, size_t len, enum sum_check_flags *pqres, struct page *spare, - struct async_submit_ctl *submit) + unsigned int s_off, struct async_submit_ctl *submit) { struct dma_chan *chan = pq_val_chan(submit, blocks, disks, len); struct dma_device *device = chan ? chan->device : NULL; @@ -295,7 +312,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOWAIT); if (unmap && disks <= dma_maxpq(device, 0) && - is_dma_pq_aligned(device, offset, 0, len)) { + is_dma_pq_aligned_offs(device, offsets, disks, len)) { struct device *dev = device->dev; dma_addr_t pq[2]; int i, j = 0, src_cnt = 0; @@ -307,7 +324,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, for (i = 0; i < disks-2; i++) if (likely(blocks[i])) { unmap->addr[j] = dma_map_page(dev, blocks[i], - offset, len, + offsets[i], len, DMA_TO_DEVICE); coefs[j] = raid6_gfexp[i]; unmap->to_cnt++; @@ -320,7 +337,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, dma_flags |= DMA_PREP_PQ_DISABLE_P; } else { pq[0] = dma_map_page(dev, P(blocks, disks), - offset, len, + P(offsets, disks), len, DMA_TO_DEVICE); unmap->addr[j++] = pq[0]; unmap->to_cnt++; @@ -330,7 +347,7 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, dma_flags |= DMA_PREP_PQ_DISABLE_Q; } else { pq[1] = dma_map_page(dev, Q(blocks, disks), - offset, len, + Q(offsets, disks), len, DMA_TO_DEVICE); unmap->addr[j++] = pq[1]; unmap->to_cnt++; @@ -355,7 +372,9 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, async_tx_submit(chan, tx, submit); } else { struct page *p_src = P(blocks, disks); + unsigned int p_off = P(offsets, disks); struct page *q_src = Q(blocks, disks); + unsigned int q_off = Q(offsets, disks); enum async_tx_flags flags_orig = submit->flags; dma_async_tx_callback cb_fn_orig = submit->cb_fn; void *scribble = submit->scribble; @@ -381,27 +400,32 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, if (p_src) { init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL, NULL, NULL, scribble); - tx = async_xor(spare, blocks, offset, disks-2, len, submit); + tx = async_xor_offs(spare, s_off, + blocks, offsets, disks-2, len, submit); async_tx_quiesce(&tx); - p = page_address(p_src) + offset; - s = page_address(spare) + offset; + p = page_address(p_src) + p_off; + s = page_address(spare) + s_off; *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P; } if (q_src) { P(blocks, disks) = NULL; Q(blocks, disks) = spare; + Q(offsets, disks) = s_off; init_async_submit(submit, 0, NULL, NULL, NULL, scribble); - tx = async_gen_syndrome(blocks, offset, disks, len, submit); + tx = async_gen_syndrome(blocks, offsets, disks, + len, submit); async_tx_quiesce(&tx); - q = page_address(q_src) + offset; - s = page_address(spare) + offset; + q = page_address(q_src) + q_off; + s = page_address(spare) + s_off; *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q; } /* restore P, Q and submit */ P(blocks, disks) = p_src; + P(offsets, disks) = p_off; Q(blocks, disks) = q_src; + Q(offsets, disks) = q_off; submit->cb_fn = cb_fn_orig; submit->cb_param = cb_param_orig; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index f249142ceac4..354b8cd5537f 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -15,8 +15,9 @@ #include <linux/dmaengine.h> static struct dma_async_tx_descriptor * -async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, - size_t len, struct async_submit_ctl *submit) +async_sum_product(struct page *dest, unsigned int d_off, + struct page **srcs, unsigned int *src_offs, unsigned char *coef, + size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, srcs, 2, len); @@ -37,11 +38,14 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); - unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + unmap->addr[0] = dma_map_page(dev, srcs[0], src_offs[0], + len, DMA_TO_DEVICE); + unmap->addr[1] = dma_map_page(dev, srcs[1], src_offs[1], + len, DMA_TO_DEVICE); unmap->to_cnt = 2; - unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + unmap->addr[2] = dma_map_page(dev, dest, d_off, + len, DMA_BIDIRECTIONAL); unmap->bidi_cnt = 1; /* engine only looks at Q, but expects it to follow P */ pq[1] = unmap->addr[2]; @@ -66,9 +70,9 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, async_tx_quiesce(&submit->depend_tx); amul = raid6_gfmul[coef[0]]; bmul = raid6_gfmul[coef[1]]; - a = page_address(srcs[0]); - b = page_address(srcs[1]); - c = page_address(dest); + a = page_address(srcs[0]) + src_offs[0]; + b = page_address(srcs[1]) + src_offs[1]; + c = page_address(dest) + d_off; while (len--) { ax = amul[*a++]; @@ -80,8 +84,9 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, } static struct dma_async_tx_descriptor * -async_mult(struct page *dest, struct page *src, u8 coef, size_t len, - struct async_submit_ctl *submit) +async_mult(struct page *dest, unsigned int d_off, struct page *src, + unsigned int s_off, u8 coef, size_t len, + struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, &src, 1, len); @@ -101,9 +106,11 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + unmap->addr[0] = dma_map_page(dev, src, s_off, + len, DMA_TO_DEVICE); unmap->to_cnt++; - unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + unmap->addr[1] = dma_map_page(dev, dest, d_off, + len, DMA_BIDIRECTIONAL); dma_dest[1] = unmap->addr[1]; unmap->bidi_cnt++; unmap->len = len; @@ -133,8 +140,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, */ async_tx_quiesce(&submit->depend_tx); qmul = raid6_gfmul[coef]; - d = page_address(dest); - s = page_address(src); + d = page_address(dest) + d_off; + s = page_address(src) + s_off; while (len--) *d++ = qmul[*s++]; @@ -144,11 +151,14 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, static struct dma_async_tx_descriptor * __2data_recov_4(int disks, size_t bytes, int faila, int failb, - struct page **blocks, struct async_submit_ctl *submit) + struct page **blocks, unsigned int *offs, + struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *a, *b; + unsigned int p_off, q_off, a_off, b_off; struct page *srcs[2]; + unsigned int src_offs[2]; unsigned char coef[2]; enum async_tx_flags flags = submit->flags; dma_async_tx_callback cb_fn = submit->cb_fn; @@ -156,26 +166,34 @@ __2data_recov_4(int disks, size_t bytes, int faila, int failb, void *scribble = submit->scribble; p = blocks[disks-2]; + p_off = offs[disks-2]; q = blocks[disks-1]; + q_off = offs[disks-1]; a = blocks[faila]; + a_off = offs[faila]; b = blocks[failb]; + b_off = offs[failb]; /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */ /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ srcs[0] = p; + src_offs[0] = p_off; srcs[1] = q; + src_offs[1] = q_off; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_sum_product(b, srcs, coef, bytes, submit); + tx = async_sum_product(b, b_off, srcs, src_offs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ srcs[0] = p; + src_offs[0] = p_off; srcs[1] = b; + src_offs[1] = b_off; init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn, cb_param, scribble); - tx = async_xor(a, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(a, a_off, srcs, src_offs, 2, bytes, submit); return tx; @@ -183,11 +201,14 @@ __2data_recov_4(int disks, size_t bytes, int faila, int failb, static struct dma_async_tx_descriptor * __2data_recov_5(int disks, size_t bytes, int faila, int failb, - struct page **blocks, struct async_submit_ctl *submit) + struct page **blocks, unsigned int *offs, + struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *g, *dp, *dq; + unsigned int p_off, q_off, g_off, dp_off, dq_off; struct page *srcs[2]; + unsigned int src_offs[2]; unsigned char coef[2]; enum async_tx_flags flags = submit->flags; dma_async_tx_callback cb_fn = submit->cb_fn; @@ -208,60 +229,77 @@ __2data_recov_5(int disks, size_t bytes, int faila, int failb, BUG_ON(good_srcs > 1); p = blocks[disks-2]; + p_off = offs[disks-2]; q = blocks[disks-1]; + q_off = offs[disks-1]; g = blocks[good]; + g_off = offs[good]; /* Compute syndrome with zero for the missing data pages * Use the dead data pages as temporary storage for delta p and * delta q */ dp = blocks[faila]; + dp_off = offs[faila]; dq = blocks[failb]; + dq_off = offs[failb]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_memcpy(dp, g, 0, 0, bytes, submit); + tx = async_memcpy(dp, g, dp_off, g_off, bytes, submit); init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + tx = async_mult(dq, dq_off, g, g_off, + raid6_gfexp[good], bytes, submit); /* compute P + Pxy */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = p; + src_offs[1] = p_off; init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, scribble); - tx = async_xor(dp, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; + src_offs[0] = dq_off; srcs[1] = q; + src_offs[1] = q_off; init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, scribble); - tx = async_xor(dq, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = dq; + src_offs[1] = dq_off; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_sum_product(dq, srcs, coef, bytes, submit); + tx = async_sum_product(dq, dq_off, srcs, src_offs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = dq; + src_offs[1] = dq_off; init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, cb_param, scribble); - tx = async_xor(dp, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); return tx; } static struct dma_async_tx_descriptor * __2data_recov_n(int disks, size_t bytes, int faila, int failb, - struct page **blocks, struct async_submit_ctl *submit) + struct page **blocks, unsigned int *offs, + struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *dp, *dq; + unsigned int p_off, q_off, dp_off, dq_off; struct page *srcs[2]; + unsigned int src_offs[2]; unsigned char coef[2]; enum async_tx_flags flags = submit->flags; dma_async_tx_callback cb_fn = submit->cb_fn; @@ -269,56 +307,74 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, void *scribble = submit->scribble; p = blocks[disks-2]; + p_off = offs[disks-2]; q = blocks[disks-1]; + q_off = offs[disks-1]; /* Compute syndrome with zero for the missing data pages * Use the dead data pages as temporary storage for * delta p and delta q */ dp = blocks[faila]; + dp_off = offs[faila]; blocks[faila] = NULL; blocks[disks-2] = dp; + offs[disks-2] = dp_off; dq = blocks[failb]; + dq_off = offs[failb]; blocks[failb] = NULL; blocks[disks-1] = dq; + offs[disks-1] = dq_off; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + tx = async_gen_syndrome(blocks, offs, disks, bytes, submit); /* Restore pointer table */ blocks[faila] = dp; + offs[faila] = dp_off; blocks[failb] = dq; + offs[failb] = dq_off; blocks[disks-2] = p; + offs[disks-2] = p_off; blocks[disks-1] = q; + offs[disks-1] = q_off; /* compute P + Pxy */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = p; + src_offs[1] = p_off; init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, scribble); - tx = async_xor(dp, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); /* compute Q + Qxy */ srcs[0] = dq; + src_offs[0] = dq_off; srcs[1] = q; + src_offs[1] = q_off; init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, scribble); - tx = async_xor(dq, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); /* Dx = A*(P+Pxy) + B*(Q+Qxy) */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = dq; + src_offs[1] = dq_off; coef[0] = raid6_gfexi[failb-faila]; coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_sum_product(dq, srcs, coef, bytes, submit); + tx = async_sum_product(dq, dq_off, srcs, src_offs, coef, bytes, submit); /* Dy = P+Pxy+Dx */ srcs[0] = dp; + src_offs[0] = dp_off; srcs[1] = dq; + src_offs[1] = dq_off; init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, cb_param, scribble); - tx = async_xor(dp, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dp, dp_off, srcs, src_offs, 2, bytes, submit); return tx; } @@ -330,11 +386,13 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb, * @faila: first failed drive index * @failb: second failed drive index * @blocks: array of source pointers where the last two entries are p and q + * @offs: array of offset for pages in blocks * @submit: submission/completion modifiers */ struct dma_async_tx_descriptor * async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, - struct page **blocks, struct async_submit_ctl *submit) + struct page **blocks, unsigned int *offs, + struct async_submit_ctl *submit) { void *scribble = submit->scribble; int non_zero_srcs, i; @@ -358,7 +416,7 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, if (blocks[i] == NULL) ptrs[i] = (void *) raid6_empty_zero_page; else - ptrs[i] = page_address(blocks[i]); + ptrs[i] = page_address(blocks[i]) + offs[i]; raid6_2data_recov(disks, bytes, faila, failb, ptrs); @@ -383,16 +441,19 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, * explicitly handle the special case of a 4 disk array with * both data disks missing. */ - return __2data_recov_4(disks, bytes, faila, failb, blocks, submit); + return __2data_recov_4(disks, bytes, faila, failb, + blocks, offs, submit); case 3: /* dma devices do not uniformly understand a single * source pq operation (in contrast to the synchronous * case), so explicitly handle the special case of a 5 disk * array with 2 of 3 data disks missing. */ - return __2data_recov_5(disks, bytes, faila, failb, blocks, submit); + return __2data_recov_5(disks, bytes, faila, failb, + blocks, offs, submit); default: - return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); + return __2data_recov_n(disks, bytes, faila, failb, + blocks, offs, submit); } } EXPORT_SYMBOL_GPL(async_raid6_2data_recov); @@ -403,14 +464,17 @@ EXPORT_SYMBOL_GPL(async_raid6_2data_recov); * @bytes: block size * @faila: failed drive index * @blocks: array of source pointers where the last two entries are p and q + * @offs: array of offset for pages in blocks * @submit: submission/completion modifiers */ struct dma_async_tx_descriptor * async_raid6_datap_recov(int disks, size_t bytes, int faila, - struct page **blocks, struct async_submit_ctl *submit) + struct page **blocks, unsigned int *offs, + struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct page *p, *q, *dq; + unsigned int p_off, q_off, dq_off; u8 coef; enum async_tx_flags flags = submit->flags; dma_async_tx_callback cb_fn = submit->cb_fn; @@ -418,6 +482,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, void *scribble = submit->scribble; int good_srcs, good, i; struct page *srcs[2]; + unsigned int src_offs[2]; pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); @@ -434,7 +499,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, if (blocks[i] == NULL) ptrs[i] = (void*)raid6_empty_zero_page; else - ptrs[i] = page_address(blocks[i]); + ptrs[i] = page_address(blocks[i]) + offs[i]; raid6_datap_recov(disks, bytes, faila, ptrs); @@ -458,55 +523,67 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, BUG_ON(good_srcs == 0); p = blocks[disks-2]; + p_off = offs[disks-2]; q = blocks[disks-1]; + q_off = offs[disks-1]; /* Compute syndrome with zero for the missing data page * Use the dead data page as temporary storage for delta q */ dq = blocks[faila]; + dq_off = offs[faila]; blocks[faila] = NULL; blocks[disks-1] = dq; + offs[disks-1] = dq_off; /* in the 4-disk case we only need to perform a single source * multiplication with the one good data block. */ if (good_srcs == 1) { struct page *g = blocks[good]; + unsigned int g_off = offs[good]; init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_memcpy(p, g, 0, 0, bytes, submit); + tx = async_memcpy(p, g, p_off, g_off, bytes, submit); init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit); + tx = async_mult(dq, dq_off, g, g_off, + raid6_gfexp[good], bytes, submit); } else { init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_gen_syndrome(blocks, 0, disks, bytes, submit); + tx = async_gen_syndrome(blocks, offs, disks, bytes, submit); } /* Restore pointer table */ blocks[faila] = dq; + offs[faila] = dq_off; blocks[disks-1] = q; + offs[disks-1] = q_off; /* calculate g^{-faila} */ coef = raid6_gfinv[raid6_gfexp[faila]]; srcs[0] = dq; + src_offs[0] = dq_off; srcs[1] = q; + src_offs[1] = q_off; init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL, scribble); - tx = async_xor(dq, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(dq, dq_off, srcs, src_offs, 2, bytes, submit); init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble); - tx = async_mult(dq, dq, coef, bytes, submit); + tx = async_mult(dq, dq_off, dq, dq_off, coef, bytes, submit); srcs[0] = p; + src_offs[0] = p_off; srcs[1] = dq; + src_offs[1] = dq_off; init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn, cb_param, scribble); - tx = async_xor(p, srcs, 0, 2, bytes, submit); + tx = async_xor_offs(p, p_off, srcs, src_offs, 2, bytes, submit); return tx; } diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 4e5eebe52e6a..a057ecb1288d 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -97,7 +97,8 @@ do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap, } static void -do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, +do_sync_xor_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offs, int src_cnt, size_t len, struct async_submit_ctl *submit) { int i; @@ -114,7 +115,8 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, /* convert to buffer pointers */ for (i = 0; i < src_cnt; i++) if (src_list[i]) - srcs[xor_src_cnt++] = page_address(src_list[i]) + offset; + srcs[xor_src_cnt++] = page_address(src_list[i]) + + (src_offs ? src_offs[i] : offset); src_cnt = xor_src_cnt; /* set destination address */ dest_buf = page_address(dest) + offset; @@ -135,11 +137,31 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, async_tx_sync_epilog(submit); } +static inline bool +dma_xor_aligned_offsets(struct dma_device *device, unsigned int offset, + unsigned int *src_offs, int src_cnt, int len) +{ + int i; + + if (!is_dma_xor_aligned(device, offset, 0, len)) + return false; + + if (!src_offs) + return true; + + for (i = 0; i < src_cnt; i++) { + if (!is_dma_xor_aligned(device, src_offs[i], 0, len)) + return false; + } + return true; +} + /** - * async_xor - attempt to xor a set of blocks with a dma engine. + * async_xor_offs - attempt to xor a set of blocks with a dma engine. * @dest: destination page + * @offset: dst offset to start transaction * @src_list: array of source pages - * @offset: common src/dst offset to start transaction + * @src_offs: array of source pages offset, NULL means common src/dst offset * @src_cnt: number of source pages * @len: length in bytes * @submit: submission / completion modifiers @@ -157,8 +179,9 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, * is not specified. */ struct dma_async_tx_descriptor * -async_xor(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, struct async_submit_ctl *submit) +async_xor_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offs, + int src_cnt, size_t len, struct async_submit_ctl *submit) { struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, @@ -171,7 +194,8 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, if (device) unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOWAIT); - if (unmap && is_dma_xor_aligned(device, offset, 0, len)) { + if (unmap && dma_xor_aligned_offsets(device, offset, + src_offs, src_cnt, len)) { struct dma_async_tx_descriptor *tx; int i, j; @@ -184,7 +208,8 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, continue; unmap->to_cnt++; unmap->addr[j++] = dma_map_page(device->dev, src_list[i], - offset, len, DMA_TO_DEVICE); + src_offs ? src_offs[i] : offset, + len, DMA_TO_DEVICE); } /* map it bidirectional as it may be re-used as a source */ @@ -213,11 +238,42 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, /* wait for any prerequisite operations */ async_tx_quiesce(&submit->depend_tx); - do_sync_xor(dest, src_list, offset, src_cnt, len, submit); + do_sync_xor_offs(dest, offset, src_list, src_offs, + src_cnt, len, submit); return NULL; } } +EXPORT_SYMBOL_GPL(async_xor_offs); + +/** + * async_xor - attempt to xor a set of blocks with a dma engine. + * @dest: destination page + * @src_list: array of source pages + * @offset: common src/dst offset to start transaction + * @src_cnt: number of source pages + * @len: length in bytes + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST + * + * xor_blocks always uses the dest as a source so the + * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in + * the calculation. The assumption with dma eninges is that they only + * use the destination buffer as a source when it is explicity specified + * in the source list. + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. + */ +struct dma_async_tx_descriptor * +async_xor(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, struct async_submit_ctl *submit) +{ + return async_xor_offs(dest, offset, src_list, NULL, + src_cnt, len, submit); +} EXPORT_SYMBOL_GPL(async_xor); static int page_is_zero(struct page *p, unsigned int offset, size_t len) @@ -237,10 +293,11 @@ xor_val_chan(struct async_submit_ctl *submit, struct page *dest, } /** - * async_xor_val - attempt a xor parity check with a dma engine. + * async_xor_val_offs - attempt a xor parity check with a dma engine. * @dest: destination page used if the xor is performed synchronously + * @offset: des offset in pages to start transaction * @src_list: array of source pages - * @offset: offset in pages to start transaction + * @src_offs: array of source pages offset, NULL means common src/det offset * @src_cnt: number of source pages * @len: length in bytes * @result: 0 if sum == 0 else non-zero @@ -253,9 +310,10 @@ xor_val_chan(struct async_submit_ctl *submit, struct page *dest, * is not specified. */ struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum sum_check_flags *result, - struct async_submit_ctl *submit) +async_xor_val_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offs, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit) { struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; @@ -268,7 +326,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOWAIT); if (unmap && src_cnt <= device->max_xor && - is_dma_xor_aligned(device, offset, 0, len)) { + dma_xor_aligned_offsets(device, offset, src_offs, src_cnt, len)) { unsigned long dma_prep_flags = 0; int i; @@ -281,7 +339,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, for (i = 0; i < src_cnt; i++) { unmap->addr[i] = dma_map_page(device->dev, src_list[i], - offset, len, DMA_TO_DEVICE); + src_offs ? src_offs[i] : offset, + len, DMA_TO_DEVICE); unmap->to_cnt++; } unmap->len = len; @@ -312,7 +371,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, submit->flags |= ASYNC_TX_XOR_DROP_DST; submit->flags &= ~ASYNC_TX_ACK; - tx = async_xor(dest, src_list, offset, src_cnt, len, submit); + tx = async_xor_offs(dest, offset, src_list, src_offs, + src_cnt, len, submit); async_tx_quiesce(&tx); @@ -325,6 +385,32 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, return tx; } +EXPORT_SYMBOL_GPL(async_xor_val_offs); + +/** + * async_xor_val - attempt a xor parity check with a dma engine. + * @dest: destination page used if the xor is performed synchronously + * @src_list: array of source pages + * @offset: offset in pages to start transaction + * @src_cnt: number of source pages + * @len: length in bytes + * @result: 0 if sum == 0 else non-zero + * @submit: submission / completion modifiers + * + * honored flags: ASYNC_TX_ACK + * + * src_list note: if the dest is also a source it must be at index zero. + * The contents of this array will be overwritten if a scribble region + * is not specified. + */ +struct dma_async_tx_descriptor * +async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit) +{ + return async_xor_val_offs(dest, offset, src_list, NULL, src_cnt, + len, result, submit); +} EXPORT_SYMBOL_GPL(async_xor_val); MODULE_AUTHOR("Intel Corporation"); diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 14e73dcd7475..66db82e5a3b1 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -18,6 +18,7 @@ #define NDISKS 64 /* Including P and Q */ static struct page *dataptrs[NDISKS]; +unsigned int dataoffs[NDISKS]; static addr_conv_t addr_conv[NDISKS]; static struct page *data[NDISKS+3]; static struct page *spare; @@ -38,6 +39,7 @@ static void makedata(int disks) for (i = 0; i < disks; i++) { prandom_bytes(page_address(data[i]), PAGE_SIZE); dataptrs[i] = data[i]; + dataoffs[i] = 0; } } @@ -52,7 +54,8 @@ static char disk_type(int d, int disks) } /* Recover two failed blocks. */ -static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs) +static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, + struct page **ptrs, unsigned int *offs) { struct async_submit_ctl submit; struct completion cmp; @@ -66,7 +69,8 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru if (faila == disks-2) { /* P+Q failure. Just rebuild the syndrome. */ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); - tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + tx = async_gen_syndrome(ptrs, offs, + disks, bytes, &submit); } else { struct page *blocks[NDISKS]; struct page *dest; @@ -89,22 +93,26 @@ static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, stru tx = async_xor(dest, blocks, 0, count, bytes, &submit); init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv); - tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit); + tx = async_gen_syndrome(ptrs, offs, + disks, bytes, &submit); } } else { if (failb == disks-2) { /* data+P failure. */ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); - tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit); + tx = async_raid6_datap_recov(disks, bytes, + faila, ptrs, offs, &submit); } else { /* data+data failure. */ init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv); - tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit); + tx = async_raid6_2data_recov(disks, bytes, + faila, failb, ptrs, offs, &submit); } } init_completion(&cmp); init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv); - tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit); + tx = async_syndrome_val(ptrs, offs, + disks, bytes, &result, spare, 0, &submit); async_tx_issue_pending(tx); if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) @@ -126,7 +134,7 @@ static int test_disks(int i, int j, int disks) dataptrs[i] = recovi; dataptrs[j] = recovj; - raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs); + raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs, dataoffs); erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE); errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE); @@ -162,7 +170,7 @@ static int test(int disks, int *tests) /* Generate assumed good syndrome */ init_completion(&cmp); init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv); - tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit); + tx = async_gen_syndrome(dataptrs, dataoffs, disks, PAGE_SIZE, &submit); async_tx_issue_pending(tx); if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) { diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index b10c51988c8e..55b757a223a4 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -357,11 +357,12 @@ static int read_page(struct file *file, unsigned long index, struct inode *inode = file_inode(file); struct buffer_head *bh; sector_t block, blk_cur; + unsigned long blocksize = i_blocksize(inode); pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT); - bh = alloc_page_buffers(page, 1<<inode->i_blkbits, false); + bh = alloc_page_buffers(page, blocksize, false); if (!bh) { ret = -ENOMEM; goto out; @@ -383,10 +384,10 @@ static int read_page(struct file *file, unsigned long index, bh->b_blocknr = block; bh->b_bdev = inode->i_sb->s_bdev; - if (count < (1<<inode->i_blkbits)) + if (count < blocksize) count = 0; else - count -= (1<<inode->i_blkbits); + count -= blocksize; bh->b_end_io = end_bitmap_write; bh->b_private = bitmap; diff --git a/drivers/md/md.c b/drivers/md/md.c index 64bc22d2b606..dfff4bb2bb20 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8583,6 +8583,26 @@ void md_write_end(struct mddev *mddev) EXPORT_SYMBOL(md_write_end); +/* This is used by raid0 and raid10 */ +void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, + struct bio *bio, sector_t start, sector_t size) +{ + struct bio *discard_bio = NULL; + + if (__blkdev_issue_discard(rdev->bdev, start, size, + GFP_NOIO, 0, &discard_bio) || !discard_bio) + return; + + bio_chain(discard_bio, bio); + bio_clone_blkg_association(discard_bio, bio); + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(rdev->bdev), + discard_bio, disk_devt(mddev->gendisk), + bio->bi_iter.bi_sector); + submit_bio_noacct(discard_bio); +} +EXPORT_SYMBOL(md_submit_discard_bio); + /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index f9e2ccdd22c4..543366041911 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -713,6 +713,8 @@ extern void md_write_end(struct mddev *mddev); extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); +extern void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, + struct bio *bio, sector_t start, sector_t size); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index aa2d72791768..e2de0cced707 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -494,7 +494,6 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) for (disk = 0; disk < zone->nb_dev; disk++) { sector_t dev_start, dev_end; - struct bio *discard_bio = NULL; struct md_rdev *rdev; if (disk < start_disk_index) @@ -517,18 +516,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) rdev = conf->devlist[(zone - conf->strip_zone) * conf->strip_zone[0].nb_dev + disk]; - if (__blkdev_issue_discard(rdev->bdev, + md_submit_discard_bio(mddev, rdev, bio, dev_start + zone->dev_start + rdev->data_offset, - dev_end - dev_start, GFP_NOIO, 0, &discard_bio) || - !discard_bio) - continue; - bio_chain(discard_bio, bio); - bio_clone_blkg_association(discard_bio, bio); - if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(rdev->bdev), - discard_bio, disk_devt(mddev->gendisk), - bio->bi_iter.bi_sector); - submit_bio_noacct(discard_bio); + dev_end - dev_start); } bio_endio(bio); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5d1bdee313ec..b7bca6703df8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -91,7 +91,7 @@ static inline struct r10bio *get_resync_r10bio(struct bio *bio) static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { struct r10conf *conf = data; - int size = offsetof(struct r10bio, devs[conf->copies]); + int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]); /* allocate a r10bio with room for raid_disks entries in the * bios array */ @@ -238,7 +238,7 @@ static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio) { int i; - for (i = 0; i < conf->copies; i++) { + for (i = 0; i < conf->geo.raid_disks; i++) { struct bio **bio = & r10_bio->devs[i].bio; if (!BIO_SPECIAL(*bio)) bio_put(*bio); @@ -327,7 +327,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, int slot; int repl = 0; - for (slot = 0; slot < conf->copies; slot++) { + for (slot = 0; slot < conf->geo.raid_disks; slot++) { if (r10_bio->devs[slot].bio == bio) break; if (r10_bio->devs[slot].repl_bio == bio) { @@ -336,7 +336,6 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, } } - BUG_ON(slot == conf->copies); update_head_pos(slot, r10_bio); if (slotp) @@ -1276,12 +1275,75 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, } } +static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) +{ + int i; + struct r10conf *conf = mddev->private; + struct md_rdev *blocked_rdev; + +retry_wait: + blocked_rdev = NULL; + rcu_read_lock(); + for (i = 0; i < conf->copies; i++) { + struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); + struct md_rdev *rrdev = rcu_dereference( + conf->mirrors[i].replacement); + if (rdev == rrdev) + rrdev = NULL; + if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + atomic_inc(&rdev->nr_pending); + blocked_rdev = rdev; + break; + } + if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { + atomic_inc(&rrdev->nr_pending); + blocked_rdev = rrdev; + break; + } + + if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + sector_t dev_sector = r10_bio->devs[i].addr; + int bad_sectors; + int is_bad; + + /* Discard request doesn't care the write result + * so it doesn't need to wait blocked disk here. + */ + if (!r10_bio->sectors) + continue; + + is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors, + &first_bad, &bad_sectors); + if (is_bad < 0) { + /* Mustn't write here until the bad block + * is acknowledged + */ + atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + blocked_rdev = rdev; + break; + } + } + } + rcu_read_unlock(); + + if (unlikely(blocked_rdev)) { + /* Have to wait for this device to get unblocked, then retry */ + allow_barrier(conf); + raid10_log(conf->mddev, "%s wait rdev %d blocked", + __func__, blocked_rdev->raid_disk); + md_wait_for_blocked_rdev(blocked_rdev, mddev); + wait_barrier(conf); + goto retry_wait; + } +} + static void raid10_write_request(struct mddev *mddev, struct bio *bio, struct r10bio *r10_bio) { struct r10conf *conf = mddev->private; int i; - struct md_rdev *blocked_rdev; sector_t sectors; int max_sectors; @@ -1339,8 +1401,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ raid10_find_phys(conf, r10_bio); -retry_write: - blocked_rdev = NULL; + + wait_blocked_dev(mddev, r10_bio); + rcu_read_lock(); max_sectors = r10_bio->sectors; @@ -1351,16 +1414,6 @@ retry_write: conf->mirrors[d].replacement); if (rdev == rrdev) rrdev = NULL; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } - if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { - atomic_inc(&rrdev->nr_pending); - blocked_rdev = rrdev; - break; - } if (rdev && (test_bit(Faulty, &rdev->flags))) rdev = NULL; if (rrdev && (test_bit(Faulty, &rrdev->flags))) @@ -1381,15 +1434,6 @@ retry_write: is_bad = is_badblock(rdev, dev_sector, max_sectors, &first_bad, &bad_sectors); - if (is_bad < 0) { - /* Mustn't write here until the bad block - * is acknowledged - */ - atomic_inc(&rdev->nr_pending); - set_bit(BlockedBadBlocks, &rdev->flags); - blocked_rdev = rdev; - break; - } if (is_bad && first_bad <= dev_sector) { /* Cannot write here at all */ bad_sectors -= (dev_sector - first_bad); @@ -1425,35 +1469,6 @@ retry_write: } rcu_read_unlock(); - if (unlikely(blocked_rdev)) { - /* Have to wait for this device to get unblocked, then retry */ - int j; - int d; - - for (j = 0; j < i; j++) { - if (r10_bio->devs[j].bio) { - d = r10_bio->devs[j].devnum; - rdev_dec_pending(conf->mirrors[d].rdev, mddev); - } - if (r10_bio->devs[j].repl_bio) { - struct md_rdev *rdev; - d = r10_bio->devs[j].devnum; - rdev = conf->mirrors[d].replacement; - if (!rdev) { - /* Race with remove_disk */ - smp_mb(); - rdev = conf->mirrors[d].rdev; - } - rdev_dec_pending(rdev, mddev); - } - } - allow_barrier(conf); - raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk); - md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf); - goto retry_write; - } - if (max_sectors < r10_bio->sectors) r10_bio->sectors = max_sectors; @@ -1493,7 +1508,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->state = 0; - memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); + memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->geo.raid_disks); if (bio_data_dir(bio) == READ) raid10_read_request(mddev, bio, r10_bio); @@ -1501,6 +1516,296 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) raid10_write_request(mddev, bio, r10_bio); } +static struct bio *raid10_split_bio(struct r10conf *conf, + struct bio *bio, sector_t sectors, bool want_first) +{ + struct bio *split; + + split = bio_split(bio, sectors, GFP_NOIO, &conf->bio_split); + bio_chain(split, bio); + allow_barrier(conf); + if (want_first) { + submit_bio_noacct(bio); + bio = split; + } else + submit_bio_noacct(split); + wait_barrier(conf); + + return bio; +} + +static void raid_end_discard_bio(struct r10bio *r10bio) +{ + struct r10conf *conf = r10bio->mddev->private; + struct r10bio *first_r10bio; + + while (atomic_dec_and_test(&r10bio->remaining)) { + + allow_barrier(conf); + + if (!test_bit(R10BIO_Discard, &r10bio->state)) { + first_r10bio = (struct r10bio *)r10bio->master_bio; + free_r10bio(r10bio); + r10bio = first_r10bio; + } else { + md_write_end(r10bio->mddev); + bio_endio(r10bio->master_bio); + free_r10bio(r10bio); + break; + } + } +} + +static void raid10_end_discard_request(struct bio *bio) +{ + struct r10bio *r10_bio = bio->bi_private; + struct r10conf *conf = r10_bio->mddev->private; + struct md_rdev *rdev = NULL; + int dev; + int slot, repl; + + /* + * We don't care the return value of discard bio + */ + if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) + set_bit(R10BIO_Uptodate, &r10_bio->state); + + dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); + if (repl) + rdev = conf->mirrors[dev].replacement; + if (!rdev) { + /* raid10_remove_disk uses smp_mb to make sure rdev is set to + * replacement before setting replacement to NULL. It can read + * rdev first without barrier protect even replacment is NULL + */ + smp_rmb(); + rdev = conf->mirrors[dev].rdev; + } + + raid_end_discard_bio(r10_bio); + rdev_dec_pending(rdev, conf->mddev); +} + +/* There are some limitations to handle discard bio + * 1st, the discard size is bigger than stripe_size*2. + * 2st, if the discard bio spans reshape progress, we use the old way to + * handle discard bio + */ +static int raid10_handle_discard(struct mddev *mddev, struct bio *bio) +{ + struct r10conf *conf = mddev->private; + struct geom *geo = &conf->geo; + struct r10bio *r10_bio, *first_r10bio; + int far_copies = geo->far_copies; + bool first_copy = true; + + int disk; + sector_t chunk; + unsigned int stripe_size; + sector_t split_size; + + sector_t bio_start, bio_end; + sector_t first_stripe_index, last_stripe_index; + sector_t start_disk_offset; + unsigned int start_disk_index; + sector_t end_disk_offset; + unsigned int end_disk_index; + unsigned int remainder; + + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + return -EAGAIN; + + wait_barrier(conf); + + /* Check reshape again to avoid reshape happens after checking + * MD_RECOVERY_RESHAPE and before wait_barrier + */ + if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) + goto out; + + stripe_size = geo->raid_disks << geo->chunk_shift; + bio_start = bio->bi_iter.bi_sector; + bio_end = bio_end_sector(bio); + + /* Maybe one discard bio is smaller than strip size or across one stripe + * and discard region is larger than one stripe size. For far offset layout, + * if the discard region is not aligned with stripe size, there is hole + * when we submit discard bio to member disk. For simplicity, we only + * handle discard bio which discard region is bigger than stripe_size*2 + */ + if (bio_sectors(bio) < stripe_size*2) + goto out; + + /* For far and far offset layout, if bio is not aligned with stripe size, + * it splits the part that is not aligned with strip size. + */ + div_u64_rem(bio_start, stripe_size, &remainder); + if ((far_copies > 1) && remainder) { + split_size = stripe_size - remainder; + bio = raid10_split_bio(conf, bio, split_size, false); + } + div_u64_rem(bio_end, stripe_size, &remainder); + if ((far_copies > 1) && remainder) { + split_size = bio_sectors(bio) - remainder; + bio = raid10_split_bio(conf, bio, split_size, true); + } + + bio_start = bio->bi_iter.bi_sector; + bio_end = bio_end_sector(bio); + + /* raid10 uses chunk as the unit to store data. It's similar like raid0. + * One stripe contains the chunks from all member disk (one chunk from + * one disk at the same HBA address). For layout detail, see 'man md 4' + */ + chunk = bio_start >> geo->chunk_shift; + chunk *= geo->near_copies; + first_stripe_index = chunk; + start_disk_index = sector_div(first_stripe_index, geo->raid_disks); + if (geo->far_offset) + first_stripe_index *= geo->far_copies; + start_disk_offset = (bio_start & geo->chunk_mask) + + (first_stripe_index << geo->chunk_shift); + + chunk = bio_end >> geo->chunk_shift; + chunk *= geo->near_copies; + last_stripe_index = chunk; + end_disk_index = sector_div(last_stripe_index, geo->raid_disks); + if (geo->far_offset) + last_stripe_index *= geo->far_copies; + end_disk_offset = (bio_end & geo->chunk_mask) + + (last_stripe_index << geo->chunk_shift); + +retry_discard: + r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO); + r10_bio->mddev = mddev; + r10_bio->state = 0; + r10_bio->sectors = 0; + memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks); + wait_blocked_dev(mddev, r10_bio); + + /* For far layout it needs more than one r10bio to cover all regions. + * Inspired by raid10_sync_request, we can use the first r10bio->master_bio + * to record the discard bio. Other r10bio->master_bio record the first + * r10bio. The first r10bio only release after all other r10bios finish. + * The discard bio returns only first r10bio finishes + */ + if (first_copy) { + r10_bio->master_bio = bio; + set_bit(R10BIO_Discard, &r10_bio->state); + first_copy = false; + first_r10bio = r10_bio; + } else + r10_bio->master_bio = (struct bio *)first_r10bio; + + rcu_read_lock(); + for (disk = 0; disk < geo->raid_disks; disk++) { + struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); + struct md_rdev *rrdev = rcu_dereference( + conf->mirrors[disk].replacement); + + r10_bio->devs[disk].bio = NULL; + r10_bio->devs[disk].repl_bio = NULL; + + if (rdev && (test_bit(Faulty, &rdev->flags))) + rdev = NULL; + if (rrdev && (test_bit(Faulty, &rrdev->flags))) + rrdev = NULL; + if (!rdev && !rrdev) + continue; + + if (rdev) { + r10_bio->devs[disk].bio = bio; + atomic_inc(&rdev->nr_pending); + } + if (rrdev) { + r10_bio->devs[disk].repl_bio = bio; + atomic_inc(&rrdev->nr_pending); + } + } + rcu_read_unlock(); + + atomic_set(&r10_bio->remaining, 1); + for (disk = 0; disk < geo->raid_disks; disk++) { + sector_t dev_start, dev_end; + struct bio *mbio, *rbio = NULL; + struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev); + struct md_rdev *rrdev = rcu_dereference( + conf->mirrors[disk].replacement); + + /* + * Now start to calculate the start and end address for each disk. + * The space between dev_start and dev_end is the discard region. + * + * For dev_start, it needs to consider three conditions: + * 1st, the disk is before start_disk, you can imagine the disk in + * the next stripe. So the dev_start is the start address of next + * stripe. + * 2st, the disk is after start_disk, it means the disk is at the + * same stripe of first disk + * 3st, the first disk itself, we can use start_disk_offset directly + */ + if (disk < start_disk_index) + dev_start = (first_stripe_index + 1) * mddev->chunk_sectors; + else if (disk > start_disk_index) + dev_start = first_stripe_index * mddev->chunk_sectors; + else + dev_start = start_disk_offset; + + if (disk < end_disk_index) + dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; + else if (disk > end_disk_index) + dev_end = last_stripe_index * mddev->chunk_sectors; + else + dev_end = end_disk_offset; + + /* It only handles discard bio which size is >= stripe size, so + * dev_end > dev_start all the time + */ + if (r10_bio->devs[disk].bio) { + mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + mbio->bi_end_io = raid10_end_discard_request; + mbio->bi_private = r10_bio; + r10_bio->devs[disk].bio = mbio; + r10_bio->devs[disk].devnum = disk; + atomic_inc(&r10_bio->remaining); + md_submit_discard_bio(mddev, rdev, mbio, + dev_start + choose_data_offset(r10_bio, rdev), + dev_end - dev_start); + bio_endio(mbio); + } + if (r10_bio->devs[disk].repl_bio) { + rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set); + rbio->bi_end_io = raid10_end_discard_request; + rbio->bi_private = r10_bio; + r10_bio->devs[disk].repl_bio = rbio; + r10_bio->devs[disk].devnum = disk; + atomic_inc(&r10_bio->remaining); + md_submit_discard_bio(mddev, rrdev, rbio, + dev_start + choose_data_offset(r10_bio, rrdev), + dev_end - dev_start); + bio_endio(rbio); + } + } + + if (!geo->far_offset && --far_copies) { + first_stripe_index += geo->stride >> geo->chunk_shift; + start_disk_offset += geo->stride; + last_stripe_index += geo->stride >> geo->chunk_shift; + end_disk_offset += geo->stride; + atomic_inc(&first_r10bio->remaining); + raid_end_discard_bio(r10_bio); + wait_barrier(conf); + goto retry_discard; + } + + raid_end_discard_bio(r10_bio); + + return 0; +out: + allow_barrier(conf); + return -EAGAIN; +} + static bool raid10_make_request(struct mddev *mddev, struct bio *bio) { struct r10conf *conf = mddev->private; @@ -1515,6 +1820,10 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) if (!md_write_start(mddev, bio)) return false; + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) + if (!raid10_handle_discard(mddev, bio)) + return true; + /* * If this request crosses a chunk boundary, we need to split * it. @@ -3754,7 +4063,7 @@ static int raid10_run(struct mddev *mddev) if (mddev->queue) { blk_queue_max_discard_sectors(mddev->queue, - mddev->chunk_sectors); + UINT_MAX); blk_queue_max_write_same_sectors(mddev->queue, 0); blk_queue_max_write_zeroes_sectors(mddev->queue, 0); blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); @@ -4458,8 +4767,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, last = conf->reshape_progress - 1; sector_nr = last & ~(sector_t)(conf->geo.chunk_mask & conf->prev.chunk_mask); - if (sector_nr + RESYNC_BLOCK_SIZE/512 < last) - sector_nr = last + 1 - RESYNC_BLOCK_SIZE/512; + if (sector_nr + RESYNC_SECTORS < last) + sector_nr = last + 1 - RESYNC_SECTORS; } else { /* 'next' is after the last device address that we * might write to for this chunk in the new layout @@ -4481,8 +4790,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, last = sector_nr | (conf->geo.chunk_mask & conf->prev.chunk_mask); - if (sector_nr + RESYNC_BLOCK_SIZE/512 <= last) - last = sector_nr + RESYNC_BLOCK_SIZE/512 - 1; + if (sector_nr + RESYNC_SECTORS <= last) + last = sector_nr + RESYNC_SECTORS - 1; } if (need_flush || diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 79cd2b7d3128..1461fd55311b 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -179,5 +179,6 @@ enum r10bio_state { R10BIO_Previous, /* failfast devices did receive failfast requests. */ R10BIO_FailFast, + R10BIO_Discard, }; #endif diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d589d26c86ea..66690b40818e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -448,13 +448,74 @@ out: return sh; } -static void shrink_buffers(struct stripe_head *sh) +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE +static void free_stripe_pages(struct stripe_head *sh) +{ + int i; + struct page *p; + + /* Have not allocate page pool */ + if (!sh->pages) + return; + + for (i = 0; i < sh->nr_pages; i++) { + p = sh->pages[i]; + if (p) + put_page(p); + sh->pages[i] = NULL; + } +} + +static int alloc_stripe_pages(struct stripe_head *sh, gfp_t gfp) { + int i; struct page *p; + + for (i = 0; i < sh->nr_pages; i++) { + /* The page have allocated. */ + if (sh->pages[i]) + continue; + + p = alloc_page(gfp); + if (!p) { + free_stripe_pages(sh); + return -ENOMEM; + } + sh->pages[i] = p; + } + return 0; +} + +static int +init_stripe_shared_pages(struct stripe_head *sh, struct r5conf *conf, int disks) +{ + int nr_pages, cnt; + + if (sh->pages) + return 0; + + /* Each of the sh->dev[i] need one conf->stripe_size */ + cnt = PAGE_SIZE / conf->stripe_size; + nr_pages = (disks + cnt - 1) / cnt; + + sh->pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (!sh->pages) + return -ENOMEM; + sh->nr_pages = nr_pages; + sh->stripes_per_page = cnt; + return 0; +} +#endif + +static void shrink_buffers(struct stripe_head *sh) +{ int i; int num = sh->raid_conf->pool_size; +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE for (i = 0; i < num ; i++) { + struct page *p; + WARN_ON(sh->dev[i].page != sh->dev[i].orig_page); p = sh->dev[i].page; if (!p) @@ -462,6 +523,11 @@ static void shrink_buffers(struct stripe_head *sh) sh->dev[i].page = NULL; put_page(p); } +#else + for (i = 0; i < num; i++) + sh->dev[i].page = NULL; + free_stripe_pages(sh); /* Free pages */ +#endif } static int grow_buffers(struct stripe_head *sh, gfp_t gfp) @@ -469,6 +535,7 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) int i; int num = sh->raid_conf->pool_size; +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE for (i = 0; i < num; i++) { struct page *page; @@ -477,8 +544,18 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) } sh->dev[i].page = page; sh->dev[i].orig_page = page; + sh->dev[i].offset = 0; } +#else + if (alloc_stripe_pages(sh, gfp)) + return -ENOMEM; + for (i = 0; i < num; i++) { + sh->dev[i].page = raid5_get_dev_page(sh, i); + sh->dev[i].orig_page = sh->dev[i].page; + sh->dev[i].offset = raid5_get_page_offset(sh, i); + } +#endif return 0; } @@ -1130,7 +1207,7 @@ again: sh->dev[i].vec.bv_page = sh->dev[i].page; bi->bi_vcnt = 1; bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); - bi->bi_io_vec[0].bv_offset = 0; + bi->bi_io_vec[0].bv_offset = sh->dev[i].offset; bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); bi->bi_write_hint = sh->dev[i].write_hint; if (!rrdev) @@ -1184,7 +1261,7 @@ again: sh->dev[i].rvec.bv_page = sh->dev[i].page; rbi->bi_vcnt = 1; rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); - rbi->bi_io_vec[0].bv_offset = 0; + rbi->bi_io_vec[0].bv_offset = sh->dev[i].offset; rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); rbi->bi_write_hint = sh->dev[i].write_hint; sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; @@ -1226,7 +1303,7 @@ again: static struct dma_async_tx_descriptor * async_copy_data(int frombio, struct bio *bio, struct page **page, - sector_t sector, struct dma_async_tx_descriptor *tx, + unsigned int poff, sector_t sector, struct dma_async_tx_descriptor *tx, struct stripe_head *sh, int no_skipcopy) { struct bio_vec bvl; @@ -1272,11 +1349,11 @@ async_copy_data(int frombio, struct bio *bio, struct page **page, !no_skipcopy) *page = bio_page; else - tx = async_memcpy(*page, bio_page, page_offset, + tx = async_memcpy(*page, bio_page, page_offset + poff, b_offset, clen, &submit); } else tx = async_memcpy(bio_page, *page, b_offset, - page_offset, clen, &submit); + page_offset + poff, clen, &submit); } /* chain the operations */ submit.depend_tx = tx; @@ -1349,6 +1426,7 @@ static void ops_run_biofill(struct stripe_head *sh) while (rbi && rbi->bi_iter.bi_sector < dev->sector + RAID5_STRIPE_SECTORS(conf)) { tx = async_copy_data(0, rbi, &dev->page, + dev->offset, dev->sector, tx, sh, 0); rbi = r5_next_bio(conf, rbi, dev->sector); } @@ -1404,14 +1482,25 @@ static addr_conv_t *to_addr_conv(struct stripe_head *sh, return (void *) (to_addr_page(percpu, i) + sh->disks + 2); } +/* + * Return a pointer to record offset address. + */ +static unsigned int * +to_addr_offs(struct stripe_head *sh, struct raid5_percpu *percpu) +{ + return (unsigned int *) (to_addr_conv(sh, percpu, 0) + sh->disks + 2); +} + static struct dma_async_tx_descriptor * ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) { int disks = sh->disks; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); int target = sh->ops.target; struct r5dev *tgt = &sh->dev[target]; struct page *xor_dest = tgt->page; + unsigned int off_dest = tgt->offset; int count = 0; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; @@ -1423,19 +1512,22 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) __func__, (unsigned long long)sh->sector, target); BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); - for (i = disks; i--; ) - if (i != target) + for (i = disks; i--; ) { + if (i != target) { + off_srcs[count] = sh->dev[i].offset; xor_srcs[count++] = sh->dev[i].page; + } + } atomic_inc(&sh->count); init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1443,6 +1535,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) /* set_syndrome_sources - populate source buffers for gen_syndrome * @srcs - (struct page *) array of size sh->disks + * @offs - (unsigned int) array of offset for each page * @sh - stripe_head to parse * * Populates srcs in proper layout order for the stripe and returns the @@ -1451,6 +1544,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) * is recorded in srcs[count+1]]. */ static int set_syndrome_sources(struct page **srcs, + unsigned int *offs, struct stripe_head *sh, int srctype) { @@ -1481,6 +1575,12 @@ static int set_syndrome_sources(struct page **srcs, srcs[slot] = sh->dev[i].orig_page; else srcs[slot] = sh->dev[i].page; + /* + * For R5_InJournal, PAGE_SIZE must be 4KB and will + * not shared page. In that case, dev[i].offset + * is 0. + */ + offs[slot] = sh->dev[i].offset; } i = raid6_next_disk(i, disks); } while (i != d0_idx); @@ -1493,12 +1593,14 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) { int disks = sh->disks; struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); int target; int qd_idx = sh->qd_idx; struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; struct r5dev *tgt; struct page *dest; + unsigned int dest_off; int i; int count; @@ -1517,17 +1619,18 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) tgt = &sh->dev[target]; BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); dest = tgt->page; + dest_off = tgt->offset; atomic_inc(&sh->count); if (target == qd_idx) { - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); blocks[count] = NULL; /* regenerating p is not necessary */ BUG_ON(blocks[count+1] != dest); /* q should already be set */ init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } else { /* Compute any data- or p-drive using XOR */ @@ -1535,13 +1638,14 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) for (i = disks; i-- ; ) { if (i == target || i == qd_idx) continue; + offs[count] = sh->dev[i].offset; blocks[count++] = sh->dev[i].page; } init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, + tx = async_xor_offs(dest, dest_off, blocks, offs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } @@ -1561,6 +1665,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) struct r5dev *tgt2 = &sh->dev[target2]; struct dma_async_tx_descriptor *tx; struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); struct async_submit_ctl submit; BUG_ON(sh->batch_head); @@ -1573,13 +1678,16 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) /* we need to open-code set_syndrome_sources to handle the * slot number conversion for 'faila' and 'failb' */ - for (i = 0; i < disks ; i++) + for (i = 0; i < disks ; i++) { + offs[i] = 0; blocks[i] = NULL; + } count = 0; i = d0_idx; do { int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); + offs[slot] = sh->dev[i].offset; blocks[slot] = sh->dev[i].page; if (i == target) @@ -1604,11 +1712,12 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - return async_gen_syndrome(blocks, 0, syndrome_disks+2, + return async_gen_syndrome(blocks, offs, syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } else { struct page *dest; + unsigned int dest_off; int data_target; int qd_idx = sh->qd_idx; @@ -1622,22 +1731,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) for (i = disks; i-- ; ) { if (i == data_target || i == qd_idx) continue; + offs[count] = sh->dev[i].offset; blocks[count++] = sh->dev[i].page; } dest = sh->dev[data_target].page; + dest_off = sh->dev[data_target].offset; init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, + tx = async_xor_offs(dest, dest_off, blocks, offs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_ALL); init_async_submit(&submit, ASYNC_TX_FENCE, tx, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - return async_gen_syndrome(blocks, 0, count+2, + return async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } @@ -1650,13 +1761,13 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) return async_raid6_datap_recov(syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), faila, - blocks, &submit); + blocks, offs, &submit); } else { /* We're missing D+D. */ return async_raid6_2data_recov(syndrome_disks+2, RAID5_STRIPE_SIZE(sh->raid_conf), faila, failb, - blocks, &submit); + blocks, offs, &submit); } } } @@ -1682,10 +1793,12 @@ ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, { int disks = sh->disks; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); int count = 0, pd_idx = sh->pd_idx, i; struct async_submit_ctl submit; /* existing parity data subtracted */ + unsigned int off_dest = off_srcs[count] = sh->dev[pd_idx].offset; struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; BUG_ON(sh->batch_head); @@ -1695,15 +1808,22 @@ ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; /* Only process blocks that are known to be uptodate */ - if (test_bit(R5_InJournal, &dev->flags)) + if (test_bit(R5_InJournal, &dev->flags)) { + /* + * For this case, PAGE_SIZE must be equal to 4KB and + * page offset is zero. + */ + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->orig_page; - else if (test_bit(R5_Wantdrain, &dev->flags)) + } else if (test_bit(R5_Wantdrain, &dev->flags)) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1714,17 +1834,18 @@ ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu, struct dma_async_tx_descriptor *tx) { struct page **blocks = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); int count; struct async_submit_ctl submit; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); - count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN); + count = set_syndrome_sources(blocks, offs, sh, SYNDROME_SRC_WANT_DRAIN); init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; @@ -1775,6 +1896,7 @@ again: set_bit(R5_Discard, &dev->flags); else { tx = async_copy_data(1, wbi, &dev->page, + dev->offset, dev->sector, tx, sh, r5c_is_writeback(conf->log)); if (dev->page != dev->orig_page && @@ -1854,9 +1976,11 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, { int disks = sh->disks; struct page **xor_srcs; + unsigned int *off_srcs; struct async_submit_ctl submit; int count, pd_idx = sh->pd_idx, i; struct page *xor_dest; + unsigned int off_dest; int prexor = 0; unsigned long flags; int j = 0; @@ -1881,24 +2005,31 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu, again: count = 0; xor_srcs = to_addr_page(percpu, j); + off_srcs = to_addr_offs(sh, percpu); /* check if prexor is active which means only process blocks * that are part of a read-modify-write (written) */ if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) { prexor = 1; + off_dest = off_srcs[count] = sh->dev[pd_idx].offset; xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (head_sh->dev[i].written || - test_bit(R5_InJournal, &head_sh->dev[i].flags)) + test_bit(R5_InJournal, &head_sh->dev[i].flags)) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } } else { xor_dest = sh->dev[pd_idx].page; + off_dest = sh->dev[pd_idx].offset; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (i != pd_idx) + if (i != pd_idx) { + off_srcs[count] = dev->offset; xor_srcs[count++] = dev->page; + } } } @@ -1924,10 +2055,10 @@ again: } if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + tx = async_memcpy(xor_dest, xor_srcs[0], off_dest, off_srcs[0], RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, + tx = async_xor_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; @@ -1943,6 +2074,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, { struct async_submit_ctl submit; struct page **blocks; + unsigned int *offs; int count, i, j = 0; struct stripe_head *head_sh = sh; int last_stripe; @@ -1967,6 +2099,7 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu, again: blocks = to_addr_page(percpu, j); + offs = to_addr_offs(sh, percpu); if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { synflags = SYNDROME_SRC_WRITTEN; @@ -1976,7 +2109,7 @@ again: txflags = ASYNC_TX_ACK; } - count = set_syndrome_sources(blocks, sh, synflags); + count = set_syndrome_sources(blocks, offs, sh, synflags); last_stripe = !head_sh->batch_head || list_first_entry(&sh->batch_list, struct stripe_head, batch_list) == head_sh; @@ -1988,7 +2121,7 @@ again: } else init_async_submit(&submit, 0, tx, NULL, NULL, to_addr_conv(sh, percpu, j)); - tx = async_gen_syndrome(blocks, 0, count+2, + tx = async_gen_syndrome(blocks, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; @@ -2016,7 +2149,9 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; struct page *xor_dest; + unsigned int off_dest; struct page **xor_srcs = to_addr_page(percpu, 0); + unsigned int *off_srcs = to_addr_offs(sh, percpu); struct dma_async_tx_descriptor *tx; struct async_submit_ctl submit; int count; @@ -2028,16 +2163,19 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) BUG_ON(sh->batch_head); count = 0; xor_dest = sh->dev[pd_idx].page; + off_dest = sh->dev[pd_idx].offset; + off_srcs[count] = off_dest; xor_srcs[count++] = xor_dest; for (i = disks; i--; ) { if (i == pd_idx || i == qd_idx) continue; + off_srcs[count] = sh->dev[i].offset; xor_srcs[count++] = sh->dev[i].page; } init_async_submit(&submit, 0, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor_val(xor_dest, xor_srcs, 0, count, + tx = async_xor_val_offs(xor_dest, off_dest, xor_srcs, off_srcs, count, RAID5_STRIPE_SIZE(sh->raid_conf), &sh->ops.zero_sum_result, &submit); @@ -2049,6 +2187,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp) { struct page **srcs = to_addr_page(percpu, 0); + unsigned int *offs = to_addr_offs(sh, percpu); struct async_submit_ctl submit; int count; @@ -2056,16 +2195,16 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu (unsigned long long)sh->sector, checkp); BUG_ON(sh->batch_head); - count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL); + count = set_syndrome_sources(srcs, offs, sh, SYNDROME_SRC_ALL); if (!checkp) srcs[count] = NULL; atomic_inc(&sh->count); init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, sh, to_addr_conv(sh, percpu, 0)); - async_syndrome_val(srcs, 0, count+2, + async_syndrome_val(srcs, offs, count+2, RAID5_STRIPE_SIZE(sh->raid_conf), - &sh->ops.zero_sum_result, percpu->spare_page, &submit); + &sh->ops.zero_sum_result, percpu->spare_page, 0, &submit); } static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -2142,6 +2281,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) { +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + kfree(sh->pages); +#endif if (sh->ppl_page) __free_page(sh->ppl_page); kmem_cache_free(sc, sh); @@ -2175,9 +2317,15 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, sh->ppl_page = alloc_page(gfp); if (!sh->ppl_page) { free_stripe(sc, sh); - sh = NULL; + return NULL; } } +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + if (init_stripe_shared_pages(sh, conf, disks)) { + free_stripe(sc, sh); + return NULL; + } +#endif } return sh; } @@ -2253,8 +2401,9 @@ static int scribble_alloc(struct raid5_percpu *percpu, int num, int cnt) { size_t obj_size = - sizeof(struct page *) * (num+2) + - sizeof(addr_conv_t) * (num+2); + sizeof(struct page *) * (num + 2) + + sizeof(addr_conv_t) * (num + 2) + + sizeof(unsigned int) * (num + 2); void *scribble; /* @@ -2386,9 +2535,16 @@ static int resize_stripes(struct r5conf *conf, int newsize) osh = get_free_stripe(conf, hash); unlock_device_hash_lock(conf, hash); +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + for (i = 0; i < osh->nr_pages; i++) { + nsh->pages[i] = osh->pages[i]; + osh->pages[i] = NULL; + } +#endif for(i=0; i<conf->pool_size; i++) { nsh->dev[i].page = osh->dev[i].page; nsh->dev[i].orig_page = osh->dev[i].page; + nsh->dev[i].offset = osh->dev[i].offset; } nsh->hash_lock_index = hash; free_stripe(conf->slab_cache, osh); @@ -2439,14 +2595,33 @@ static int resize_stripes(struct r5conf *conf, int newsize) nsh = list_entry(newstripes.next, struct stripe_head, lru); list_del_init(&nsh->lru); +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + for (i = 0; i < nsh->nr_pages; i++) { + if (nsh->pages[i]) + continue; + nsh->pages[i] = alloc_page(GFP_NOIO); + if (!nsh->pages[i]) + err = -ENOMEM; + } + + for (i = conf->raid_disks; i < newsize; i++) { + if (nsh->dev[i].page) + continue; + nsh->dev[i].page = raid5_get_dev_page(nsh, i); + nsh->dev[i].orig_page = nsh->dev[i].page; + nsh->dev[i].offset = raid5_get_page_offset(nsh, i); + } +#else for (i=conf->raid_disks; i < newsize; i++) if (nsh->dev[i].page == NULL) { struct page *p = alloc_page(GFP_NOIO); nsh->dev[i].page = p; nsh->dev[i].orig_page = p; + nsh->dev[i].offset = 0; if (!p) err = -ENOMEM; } +#endif raid5_release_stripe(nsh); } /* critical section pass, GFP_NOIO no longer needed */ @@ -4369,7 +4544,8 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) /* place all the copies on one channel */ init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, RAID5_STRIPE_SIZE(conf), + sh->dev[i].page, sh2->dev[dd_idx].offset, + sh->dev[i].offset, RAID5_STRIPE_SIZE(conf), &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); @@ -6506,6 +6682,7 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) struct r5conf *conf; unsigned long new; int err; + int size; if (len >= PAGE_SIZE) return -EINVAL; @@ -6538,10 +6715,29 @@ raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) pr_debug("md/raid: change stripe_size from %lu to %lu\n", conf->stripe_size, new); + if (mddev->sync_thread || + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + mddev->reshape_position != MaxSector || + mddev->sysfs_active) { + err = -EBUSY; + goto out_unlock; + } + mddev_suspend(mddev); + mutex_lock(&conf->cache_size_mutex); + size = conf->max_nr_stripes; + + shrink_stripes(conf); + conf->stripe_size = new; conf->stripe_shift = ilog2(new) - 9; conf->stripe_sectors = new >> 9; + if (grow_stripes(conf, size)) { + pr_warn("md/raid:%s: couldn't allocate buffers\n", + mdname(mddev)); + err = -ENOMEM; + } + mutex_unlock(&conf->cache_size_mutex); mddev_resume(mddev); out_unlock: diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 16fc29472f5c..5c05acf20e1f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -195,6 +195,7 @@ enum reconstruct_states { reconstruct_state_result, }; +#define DEFAULT_STRIPE_SIZE 4096 struct stripe_head { struct hlist_node hash; struct list_head lru; /* inactive_list or handle_list */ @@ -246,6 +247,13 @@ struct stripe_head { int target, target2; enum sum_check_flags zero_sum_result; } ops; + +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + /* These pages will be used by bios in dev[i] */ + struct page **pages; + int nr_pages; /* page array size */ + int stripes_per_page; +#endif struct r5dev { /* rreq and rvec are used for the replacement device when * writing data to both devices. @@ -253,6 +261,7 @@ struct stripe_head { struct bio req, rreq; struct bio_vec vec, rvec; struct page *page, *orig_page; + unsigned int offset; /* offset of the page */ struct bio *toread, *read, *towrite, *written; sector_t sector; /* sector of this page */ unsigned long flags; @@ -472,7 +481,6 @@ struct disk_info { */ #define NR_STRIPES 256 -#define DEFAULT_STRIPE_SIZE 4096 #if PAGE_SIZE == DEFAULT_STRIPE_SIZE #define STRIPE_SIZE PAGE_SIZE @@ -771,6 +779,25 @@ static inline int algorithm_is_DDF(int layout) return layout >= 8 && layout <= 10; } +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE +/* + * Return offset of the corresponding page for r5dev. + */ +static inline int raid5_get_page_offset(struct stripe_head *sh, int disk_idx) +{ + return (disk_idx % sh->stripes_per_page) * RAID5_STRIPE_SIZE(sh->raid_conf); +} + +/* + * Return corresponding page address for r5dev. + */ +static inline struct page * +raid5_get_dev_page(struct stripe_head *sh, int disk_idx) +{ + return sh->pages[disk_idx / sh->stripes_per_page]; +} +#endif + extern void md_raid5_kick_device(struct r5conf *conf); extern int raid5_set_cache_size(struct mddev *mddev, int size); extern sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous); diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 4c328fef403c..5cc73d7e5b52 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -163,11 +163,22 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * +async_xor_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offset, + int src_cnt, size_t len, struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len, enum sum_check_flags *result, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * +async_xor_val_offs(struct page *dest, unsigned int offset, + struct page **src_list, unsigned int *src_offset, + int src_cnt, size_t len, enum sum_check_flags *result, + struct async_submit_ctl *submit); + +struct dma_async_tx_descriptor * async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, unsigned int src_offset, size_t len, struct async_submit_ctl *submit); @@ -175,21 +186,23 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt, +async_gen_syndrome(struct page **blocks, unsigned int *offsets, int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt, +async_syndrome_val(struct page **blocks, unsigned int *offsets, int src_cnt, size_t len, enum sum_check_flags *pqres, struct page *spare, - struct async_submit_ctl *submit); + unsigned int s_off, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb, - struct page **ptrs, struct async_submit_ctl *submit); + struct page **ptrs, unsigned int *offs, + struct async_submit_ctl *submit); struct dma_async_tx_descriptor * async_raid6_datap_recov(int src_num, size_t bytes, int faila, - struct page **ptrs, struct async_submit_ctl *submit); + struct page **ptrs, unsigned int *offs, + struct async_submit_ctl *submit); void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ |