aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Stockhausen <stockhausen@collogia.de>2014-12-14 20:57:05 -0500
committerNeilBrown <neilb@suse.de>2015-04-21 18:00:42 -0400
commit584acdd49cd2472ca0f5a06adbe979db82d0b4af (patch)
tree94abdc5ca0208e47275bc2a8ad82c2d25cefddfd
parenta582564b24bec0443b5c5ff43ee6d1258f8bd658 (diff)
md/raid5: activate raid6 rmw feature
Glue it altogehter. The raid6 rmw path should work the same as the already existing raid5 logic. So emulate the prexor handling/flags and split functions as needed. 1) Enable xor_syndrome() in the async layer. 2) Split ops_run_prexor() into RAID4/5 and RAID6 logic. Xor the syndrome at the start of a rmw run as we did it before for the single parity. 3) Take care of rmw run in ops_run_reconstruct6(). Again process only the changed pages to get syndrome back into sync. 4) Enhance set_syndrome_sources() to fill NULL pages if we are in a rmw run. The lower layers will calculate start & end pages from that and call the xor_syndrome() correspondingly. 5) Adapt the several places where we ignored Q handling up to now. Performance numbers for a single E5630 system with a mix of 10 7200k desktop/server disks. 300 seconds random write with 8 threads onto a 3,2TB (10*400GB) RAID6 64K chunk without spare (group_thread_cnt=4) bsize rmw_level=1 rmw_level=0 rmw_level=1 rmw_level=0 skip_copy=1 skip_copy=1 skip_copy=0 skip_copy=0 4K 115 KB/s 141 KB/s 165 KB/s 140 KB/s 8K 225 KB/s 275 KB/s 324 KB/s 274 KB/s 16K 434 KB/s 536 KB/s 640 KB/s 534 KB/s 32K 751 KB/s 1,051 KB/s 1,234 KB/s 1,045 KB/s 64K 1,339 KB/s 1,958 KB/s 2,282 KB/s 1,962 KB/s 128K 2,673 KB/s 3,862 KB/s 4,113 KB/s 3,898 KB/s 256K 7,685 KB/s 7,539 KB/s 7,557 KB/s 7,638 KB/s 512K 19,556 KB/s 19,558 KB/s 19,652 KB/s 19,688 Kb/s Signed-off-by: Markus Stockhausen <stockhausen@collogia.de> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--crypto/async_tx/async_pq.c19
-rw-r--r--drivers/md/raid5.c104
-rw-r--r--drivers/md/raid5.h19
-rw-r--r--include/linux/async_tx.h3
4 files changed, 115 insertions, 30 deletions
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index d05327caf69d..5d355e0c2633 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -124,6 +124,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
124{ 124{
125 void **srcs; 125 void **srcs;
126 int i; 126 int i;
127 int start = -1, stop = disks - 3;
127 128
128 if (submit->scribble) 129 if (submit->scribble)
129 srcs = submit->scribble; 130 srcs = submit->scribble;
@@ -134,10 +135,21 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
134 if (blocks[i] == NULL) { 135 if (blocks[i] == NULL) {
135 BUG_ON(i > disks - 3); /* P or Q can't be zero */ 136 BUG_ON(i > disks - 3); /* P or Q can't be zero */
136 srcs[i] = (void*)raid6_empty_zero_page; 137 srcs[i] = (void*)raid6_empty_zero_page;
137 } else 138 } else {
138 srcs[i] = page_address(blocks[i]) + offset; 139 srcs[i] = page_address(blocks[i]) + offset;
140 if (i < disks - 2) {
141 stop = i;
142 if (start == -1)
143 start = i;
144 }
145 }
139 } 146 }
140 raid6_call.gen_syndrome(disks, len, srcs); 147 if (submit->flags & ASYNC_TX_PQ_XOR_DST) {
148 BUG_ON(!raid6_call.xor_syndrome);
149 if (start >= 0)
150 raid6_call.xor_syndrome(disks, start, stop, len, srcs);
151 } else
152 raid6_call.gen_syndrome(disks, len, srcs);
141 async_tx_sync_epilog(submit); 153 async_tx_sync_epilog(submit);
142} 154}
143 155
@@ -178,7 +190,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
178 if (device) 190 if (device)
179 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); 191 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
180 192
181 if (unmap && 193 /* XORing P/Q is only implemented in software */
194 if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
182 (src_cnt <= dma_maxpq(device, 0) || 195 (src_cnt <= dma_maxpq(device, 0) ||
183 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && 196 dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
184 is_dma_pq_aligned(device, offset, 0, len)) { 197 is_dma_pq_aligned(device, offset, 0, len)) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ae097d50b51..c82ce1fd8723 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1317,7 +1317,9 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
1317 * destination buffer is recorded in srcs[count] and the Q destination 1317 * destination buffer is recorded in srcs[count] and the Q destination
1318 * is recorded in srcs[count+1]]. 1318 * is recorded in srcs[count+1]].
1319 */ 1319 */
1320static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh) 1320static int set_syndrome_sources(struct page **srcs,
1321 struct stripe_head *sh,
1322 int srctype)
1321{ 1323{
1322 int disks = sh->disks; 1324 int disks = sh->disks;
1323 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); 1325 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
@@ -1332,8 +1334,15 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
1332 i = d0_idx; 1334 i = d0_idx;
1333 do { 1335 do {
1334 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); 1336 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1337 struct r5dev *dev = &sh->dev[i];
1335 1338
1336 srcs[slot] = sh->dev[i].page; 1339 if (i == sh->qd_idx || i == sh->pd_idx ||
1340 (srctype == SYNDROME_SRC_ALL) ||
1341 (srctype == SYNDROME_SRC_WANT_DRAIN &&
1342 test_bit(R5_Wantdrain, &dev->flags)) ||
1343 (srctype == SYNDROME_SRC_WRITTEN &&
1344 dev->written))
1345 srcs[slot] = sh->dev[i].page;
1337 i = raid6_next_disk(i, disks); 1346 i = raid6_next_disk(i, disks);
1338 } while (i != d0_idx); 1347 } while (i != d0_idx);
1339 1348
@@ -1373,7 +1382,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
1373 atomic_inc(&sh->count); 1382 atomic_inc(&sh->count);
1374 1383
1375 if (target == qd_idx) { 1384 if (target == qd_idx) {
1376 count = set_syndrome_sources(blocks, sh); 1385 count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
1377 blocks[count] = NULL; /* regenerating p is not necessary */ 1386 blocks[count] = NULL; /* regenerating p is not necessary */
1378 BUG_ON(blocks[count+1] != dest); /* q should already be set */ 1387 BUG_ON(blocks[count+1] != dest); /* q should already be set */
1379 init_async_submit(&submit, ASYNC_TX_FENCE, NULL, 1388 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
@@ -1481,7 +1490,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
1481 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, 1490 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
1482 &submit); 1491 &submit);
1483 1492
1484 count = set_syndrome_sources(blocks, sh); 1493 count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
1485 init_async_submit(&submit, ASYNC_TX_FENCE, tx, 1494 init_async_submit(&submit, ASYNC_TX_FENCE, tx,
1486 ops_complete_compute, sh, 1495 ops_complete_compute, sh,
1487 to_addr_conv(sh, percpu, 0)); 1496 to_addr_conv(sh, percpu, 0));
@@ -1515,8 +1524,8 @@ static void ops_complete_prexor(void *stripe_head_ref)
1515} 1524}
1516 1525
1517static struct dma_async_tx_descriptor * 1526static struct dma_async_tx_descriptor *
1518ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu, 1527ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
1519 struct dma_async_tx_descriptor *tx) 1528 struct dma_async_tx_descriptor *tx)
1520{ 1529{
1521 int disks = sh->disks; 1530 int disks = sh->disks;
1522 struct page **xor_srcs = to_addr_page(percpu, 0); 1531 struct page **xor_srcs = to_addr_page(percpu, 0);
@@ -1545,6 +1554,26 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
1545} 1554}
1546 1555
1547static struct dma_async_tx_descriptor * 1556static struct dma_async_tx_descriptor *
1557ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
1558 struct dma_async_tx_descriptor *tx)
1559{
1560 struct page **blocks = to_addr_page(percpu, 0);
1561 int count;
1562 struct async_submit_ctl submit;
1563
1564 pr_debug("%s: stripe %llu\n", __func__,
1565 (unsigned long long)sh->sector);
1566
1567 count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN);
1568
1569 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
1570 ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
1571 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
1572
1573 return tx;
1574}
1575
1576static struct dma_async_tx_descriptor *
1548ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 1577ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1549{ 1578{
1550 int disks = sh->disks; 1579 int disks = sh->disks;
@@ -1746,6 +1775,8 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1746 int count, i, j = 0; 1775 int count, i, j = 0;
1747 struct stripe_head *head_sh = sh; 1776 struct stripe_head *head_sh = sh;
1748 int last_stripe; 1777 int last_stripe;
1778 int synflags;
1779 unsigned long txflags;
1749 1780
1750 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); 1781 pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
1751 1782
@@ -1765,14 +1796,23 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
1765 1796
1766again: 1797again:
1767 blocks = to_addr_page(percpu, j); 1798 blocks = to_addr_page(percpu, j);
1768 count = set_syndrome_sources(blocks, sh); 1799
1800 if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
1801 synflags = SYNDROME_SRC_WRITTEN;
1802 txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST;
1803 } else {
1804 synflags = SYNDROME_SRC_ALL;
1805 txflags = ASYNC_TX_ACK;
1806 }
1807
1808 count = set_syndrome_sources(blocks, sh, synflags);
1769 last_stripe = !head_sh->batch_head || 1809 last_stripe = !head_sh->batch_head ||
1770 list_first_entry(&sh->batch_list, 1810 list_first_entry(&sh->batch_list,
1771 struct stripe_head, batch_list) == head_sh; 1811 struct stripe_head, batch_list) == head_sh;
1772 1812
1773 if (last_stripe) { 1813 if (last_stripe) {
1774 atomic_inc(&head_sh->count); 1814 atomic_inc(&head_sh->count);
1775 init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct, 1815 init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
1776 head_sh, to_addr_conv(sh, percpu, j)); 1816 head_sh, to_addr_conv(sh, percpu, j));
1777 } else 1817 } else
1778 init_async_submit(&submit, 0, tx, NULL, NULL, 1818 init_async_submit(&submit, 0, tx, NULL, NULL,
@@ -1843,7 +1883,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
1843 (unsigned long long)sh->sector, checkp); 1883 (unsigned long long)sh->sector, checkp);
1844 1884
1845 BUG_ON(sh->batch_head); 1885 BUG_ON(sh->batch_head);
1846 count = set_syndrome_sources(srcs, sh); 1886 count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL);
1847 if (!checkp) 1887 if (!checkp)
1848 srcs[count] = NULL; 1888 srcs[count] = NULL;
1849 1889
@@ -1884,8 +1924,12 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
1884 async_tx_ack(tx); 1924 async_tx_ack(tx);
1885 } 1925 }
1886 1926
1887 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) 1927 if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
1888 tx = ops_run_prexor(sh, percpu, tx); 1928 if (level < 6)
1929 tx = ops_run_prexor5(sh, percpu, tx);
1930 else
1931 tx = ops_run_prexor6(sh, percpu, tx);
1932 }
1889 1933
1890 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { 1934 if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
1891 tx = ops_run_biodrain(sh, tx); 1935 tx = ops_run_biodrain(sh, tx);
@@ -2770,7 +2814,7 @@ static void
2770schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, 2814schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2771 int rcw, int expand) 2815 int rcw, int expand)
2772{ 2816{
2773 int i, pd_idx = sh->pd_idx, disks = sh->disks; 2817 int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
2774 struct r5conf *conf = sh->raid_conf; 2818 struct r5conf *conf = sh->raid_conf;
2775 int level = conf->level; 2819 int level = conf->level;
2776 2820
@@ -2806,13 +2850,15 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2806 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 2850 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2807 atomic_inc(&conf->pending_full_writes); 2851 atomic_inc(&conf->pending_full_writes);
2808 } else { 2852 } else {
2809 BUG_ON(level == 6);
2810 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 2853 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
2811 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 2854 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
2855 BUG_ON(level == 6 &&
2856 (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
2857 test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
2812 2858
2813 for (i = disks; i--; ) { 2859 for (i = disks; i--; ) {
2814 struct r5dev *dev = &sh->dev[i]; 2860 struct r5dev *dev = &sh->dev[i];
2815 if (i == pd_idx) 2861 if (i == pd_idx || i == qd_idx)
2816 continue; 2862 continue;
2817 2863
2818 if (dev->towrite && 2864 if (dev->towrite &&
@@ -3476,28 +3522,27 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3476 int rmw = 0, rcw = 0, i; 3522 int rmw = 0, rcw = 0, i;
3477 sector_t recovery_cp = conf->mddev->recovery_cp; 3523 sector_t recovery_cp = conf->mddev->recovery_cp;
3478 3524
3479 /* RAID6 requires 'rcw' in current implementation. 3525 /* Check whether resync is now happening or should start.
3480 * Otherwise, check whether resync is now happening or should start.
3481 * If yes, then the array is dirty (after unclean shutdown or 3526 * If yes, then the array is dirty (after unclean shutdown or
3482 * initial creation), so parity in some stripes might be inconsistent. 3527 * initial creation), so parity in some stripes might be inconsistent.
3483 * In this case, we need to always do reconstruct-write, to ensure 3528 * In this case, we need to always do reconstruct-write, to ensure
3484 * that in case of drive failure or read-error correction, we 3529 * that in case of drive failure or read-error correction, we
3485 * generate correct data from the parity. 3530 * generate correct data from the parity.
3486 */ 3531 */
3487 if (conf->max_degraded == 2 || 3532 if (conf->rmw_level == PARITY_DISABLE_RMW ||
3488 (recovery_cp < MaxSector && sh->sector >= recovery_cp && 3533 (recovery_cp < MaxSector && sh->sector >= recovery_cp &&
3489 s->failed == 0)) { 3534 s->failed == 0)) {
3490 /* Calculate the real rcw later - for now make it 3535 /* Calculate the real rcw later - for now make it
3491 * look like rcw is cheaper 3536 * look like rcw is cheaper
3492 */ 3537 */
3493 rcw = 1; rmw = 2; 3538 rcw = 1; rmw = 2;
3494 pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n", 3539 pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
3495 conf->max_degraded, (unsigned long long)recovery_cp, 3540 conf->rmw_level, (unsigned long long)recovery_cp,
3496 (unsigned long long)sh->sector); 3541 (unsigned long long)sh->sector);
3497 } else for (i = disks; i--; ) { 3542 } else for (i = disks; i--; ) {
3498 /* would I have to read this buffer for read_modify_write */ 3543 /* would I have to read this buffer for read_modify_write */
3499 struct r5dev *dev = &sh->dev[i]; 3544 struct r5dev *dev = &sh->dev[i];
3500 if ((dev->towrite || i == sh->pd_idx) && 3545 if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
3501 !test_bit(R5_LOCKED, &dev->flags) && 3546 !test_bit(R5_LOCKED, &dev->flags) &&
3502 !(test_bit(R5_UPTODATE, &dev->flags) || 3547 !(test_bit(R5_UPTODATE, &dev->flags) ||
3503 test_bit(R5_Wantcompute, &dev->flags))) { 3548 test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3507,7 +3552,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3507 rmw += 2*disks; /* cannot read it */ 3552 rmw += 2*disks; /* cannot read it */
3508 } 3553 }
3509 /* Would I have to read this buffer for reconstruct_write */ 3554 /* Would I have to read this buffer for reconstruct_write */
3510 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && 3555 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
3556 i != sh->pd_idx && i != sh->qd_idx &&
3511 !test_bit(R5_LOCKED, &dev->flags) && 3557 !test_bit(R5_LOCKED, &dev->flags) &&
3512 !(test_bit(R5_UPTODATE, &dev->flags) || 3558 !(test_bit(R5_UPTODATE, &dev->flags) ||
3513 test_bit(R5_Wantcompute, &dev->flags))) { 3559 test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3520,7 +3566,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3520 pr_debug("for sector %llu, rmw=%d rcw=%d\n", 3566 pr_debug("for sector %llu, rmw=%d rcw=%d\n",
3521 (unsigned long long)sh->sector, rmw, rcw); 3567 (unsigned long long)sh->sector, rmw, rcw);
3522 set_bit(STRIPE_HANDLE, &sh->state); 3568 set_bit(STRIPE_HANDLE, &sh->state);
3523 if (rmw < rcw && rmw > 0) { 3569 if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
3524 /* prefer read-modify-write, but need to get some data */ 3570 /* prefer read-modify-write, but need to get some data */
3525 if (conf->mddev->queue) 3571 if (conf->mddev->queue)
3526 blk_add_trace_msg(conf->mddev->queue, 3572 blk_add_trace_msg(conf->mddev->queue,
@@ -3528,7 +3574,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3528 (unsigned long long)sh->sector, rmw); 3574 (unsigned long long)sh->sector, rmw);
3529 for (i = disks; i--; ) { 3575 for (i = disks; i--; ) {
3530 struct r5dev *dev = &sh->dev[i]; 3576 struct r5dev *dev = &sh->dev[i];
3531 if ((dev->towrite || i == sh->pd_idx) && 3577 if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
3532 !test_bit(R5_LOCKED, &dev->flags) && 3578 !test_bit(R5_LOCKED, &dev->flags) &&
3533 !(test_bit(R5_UPTODATE, &dev->flags) || 3579 !(test_bit(R5_UPTODATE, &dev->flags) ||
3534 test_bit(R5_Wantcompute, &dev->flags)) && 3580 test_bit(R5_Wantcompute, &dev->flags)) &&
@@ -3547,7 +3593,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3547 } 3593 }
3548 } 3594 }
3549 } 3595 }
3550 if (rcw <= rmw && rcw > 0) { 3596 if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
3551 /* want reconstruct write, but need to get some data */ 3597 /* want reconstruct write, but need to get some data */
3552 int qread =0; 3598 int qread =0;
3553 rcw = 0; 3599 rcw = 0;
@@ -6344,10 +6390,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6344 } 6390 }
6345 6391
6346 conf->level = mddev->new_level; 6392 conf->level = mddev->new_level;
6347 if (conf->level == 6) 6393 if (conf->level == 6) {
6348 conf->max_degraded = 2; 6394 conf->max_degraded = 2;
6349 else 6395 if (raid6_call.xor_syndrome)
6396 conf->rmw_level = PARITY_ENABLE_RMW;
6397 else
6398 conf->rmw_level = PARITY_DISABLE_RMW;
6399 } else {
6350 conf->max_degraded = 1; 6400 conf->max_degraded = 1;
6401 conf->rmw_level = PARITY_ENABLE_RMW;
6402 }
6351 conf->algorithm = mddev->new_layout; 6403 conf->algorithm = mddev->new_layout;
6352 conf->reshape_progress = mddev->reshape_position; 6404 conf->reshape_progress = mddev->reshape_position;
6353 if (conf->reshape_progress != MaxSector) { 6405 if (conf->reshape_progress != MaxSector) {
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index ee65ed844d3f..57fef9ba36fa 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -355,6 +355,23 @@ enum {
355 STRIPE_OP_RECONSTRUCT, 355 STRIPE_OP_RECONSTRUCT,
356 STRIPE_OP_CHECK, 356 STRIPE_OP_CHECK,
357}; 357};
358
359/*
360 * RAID parity calculation preferences
361 */
362enum {
363 PARITY_DISABLE_RMW = 0,
364 PARITY_ENABLE_RMW,
365};
366
367/*
368 * Pages requested from set_syndrome_sources()
369 */
370enum {
371 SYNDROME_SRC_ALL,
372 SYNDROME_SRC_WANT_DRAIN,
373 SYNDROME_SRC_WRITTEN,
374};
358/* 375/*
359 * Plugging: 376 * Plugging:
360 * 377 *
@@ -411,7 +428,7 @@ struct r5conf {
411 spinlock_t hash_locks[NR_STRIPE_HASH_LOCKS]; 428 spinlock_t hash_locks[NR_STRIPE_HASH_LOCKS];
412 struct mddev *mddev; 429 struct mddev *mddev;
413 int chunk_sectors; 430 int chunk_sectors;
414 int level, algorithm; 431 int level, algorithm, rmw_level;
415 int max_degraded; 432 int max_degraded;
416 int raid_disks; 433 int raid_disks;
417 int max_nr_stripes; 434 int max_nr_stripes;
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 179b38ffd351..388574ea38ed 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -60,12 +60,15 @@ struct dma_chan_ref {
60 * dependency chain 60 * dependency chain
61 * @ASYNC_TX_FENCE: specify that the next operation in the dependency 61 * @ASYNC_TX_FENCE: specify that the next operation in the dependency
62 * chain uses this operation's result as an input 62 * chain uses this operation's result as an input
63 * @ASYNC_TX_PQ_XOR_DST: do not overwrite the syndrome but XOR it with the
64 * input data. Required for rmw case.
63 */ 65 */
64enum async_tx_flags { 66enum async_tx_flags {
65 ASYNC_TX_XOR_ZERO_DST = (1 << 0), 67 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
66 ASYNC_TX_XOR_DROP_DST = (1 << 1), 68 ASYNC_TX_XOR_DROP_DST = (1 << 1),
67 ASYNC_TX_ACK = (1 << 2), 69 ASYNC_TX_ACK = (1 << 2),
68 ASYNC_TX_FENCE = (1 << 3), 70 ASYNC_TX_FENCE = (1 << 3),
71 ASYNC_TX_PQ_XOR_DST = (1 << 4),
69}; 72};
70 73
71/** 74/**