aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
committerNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
commitd0dabf7e577411c2bf6b616c751544dc241213d4 (patch)
tree26a41b66f1ae83e2127eceace281332134518eb0 /drivers
parent112bf8970dbdfc00bd4667da5996e57c2ce58066 (diff)
md/raid6: remove expectation that Q device is immediately after P device.
Code currently assumes that the devices in a raid6 stripe are 0 1 ... N-1 P Q in some rotated order. We will shortly add new layouts in which this strict pattern is broken. So remove this expectation. We still assume that the data disks are roughly in-order. However P and Q can be inserted anywhere within that order. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid5.c211
-rw-r--r--drivers/md/raid5.h15
2 files changed, 132 insertions, 94 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c33073fe7426..cb3e157b52d3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -133,12 +133,36 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
133 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); 133 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
134} 134}
135 135
136/* Find first data disk in a raid6 stripe */
137static inline int raid6_d0(struct stripe_head *sh)
138{
139 if (sh->qd_idx == sh->disks - 1)
140 return 0;
141 else
142 return sh->qd_idx + 1;
143}
136static inline int raid6_next_disk(int disk, int raid_disks) 144static inline int raid6_next_disk(int disk, int raid_disks)
137{ 145{
138 disk++; 146 disk++;
139 return (disk < raid_disks) ? disk : 0; 147 return (disk < raid_disks) ? disk : 0;
140} 148}
141 149
150/* When walking through the disks in a raid5, starting at raid6_d0,
151 * We need to map each disk to a 'slot', where the data disks are slot
152 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
153 * is raid_disks-1. This help does that mapping.
154 */
155static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count)
156{
157 int slot;
158 if (idx == sh->pd_idx)
159 return sh->disks - 2;
160 if (idx == sh->qd_idx)
161 return sh->disks - 1;
162 slot = (*count)++;
163 return slot;
164}
165
142static void return_io(struct bio *return_bi) 166static void return_io(struct bio *return_bi)
143{ 167{
144 struct bio *bi = return_bi; 168 struct bio *bi = return_bi;
@@ -196,6 +220,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
196 } 220 }
197 } 221 }
198} 222}
223
199static void release_stripe(struct stripe_head *sh) 224static void release_stripe(struct stripe_head *sh)
200{ 225{
201 raid5_conf_t *conf = sh->raid_conf; 226 raid5_conf_t *conf = sh->raid_conf;
@@ -274,12 +299,14 @@ static int grow_buffers(struct stripe_head *sh, int num)
274} 299}
275 300
276static void raid5_build_block(struct stripe_head *sh, int i); 301static void raid5_build_block(struct stripe_head *sh, int i);
277static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous); 302static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous,
303 int *qd_idx);
278 304
279static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) 305static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
280{ 306{
281 raid5_conf_t *conf = sh->raid_conf; 307 raid5_conf_t *conf = sh->raid_conf;
282 int i; 308 int i;
309 int qd_idx;
283 310
284 BUG_ON(atomic_read(&sh->count) != 0); 311 BUG_ON(atomic_read(&sh->count) != 0);
285 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 312 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
@@ -293,7 +320,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
293 320
294 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; 321 sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
295 sh->sector = sector; 322 sh->sector = sector;
296 sh->pd_idx = stripe_to_pdidx(sector, conf, previous); 323 sh->pd_idx = stripe_to_pdidx(sector, conf, previous, &qd_idx);
324 sh->qd_idx = qd_idx;
297 sh->state = 0; 325 sh->state = 0;
298 326
299 327
@@ -1235,7 +1263,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1235 */ 1263 */
1236static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, 1264static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1237 int previous, 1265 int previous,
1238 int *dd_idx, int *pd_idx) 1266 int *dd_idx, int *pd_idx, int *qd_idx)
1239{ 1267{
1240 long stripe; 1268 long stripe;
1241 unsigned long chunk_number; 1269 unsigned long chunk_number;
@@ -1268,6 +1296,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1268 /* 1296 /*
1269 * Select the parity disk based on the user selected algorithm. 1297 * Select the parity disk based on the user selected algorithm.
1270 */ 1298 */
1299 *qd_idx = ~0;
1271 switch(conf->level) { 1300 switch(conf->level) {
1272 case 4: 1301 case 4:
1273 *pd_idx = data_disks; 1302 *pd_idx = data_disks;
@@ -1303,24 +1332,30 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1303 switch (conf->algorithm) { 1332 switch (conf->algorithm) {
1304 case ALGORITHM_LEFT_ASYMMETRIC: 1333 case ALGORITHM_LEFT_ASYMMETRIC:
1305 *pd_idx = raid_disks - 1 - (stripe % raid_disks); 1334 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
1306 if (*pd_idx == raid_disks-1) 1335 *qd_idx = *pd_idx + 1;
1336 if (*pd_idx == raid_disks-1) {
1307 (*dd_idx)++; /* Q D D D P */ 1337 (*dd_idx)++; /* Q D D D P */
1308 else if (*dd_idx >= *pd_idx) 1338 *qd_idx = 0;
1339 } else if (*dd_idx >= *pd_idx)
1309 (*dd_idx) += 2; /* D D P Q D */ 1340 (*dd_idx) += 2; /* D D P Q D */
1310 break; 1341 break;
1311 case ALGORITHM_RIGHT_ASYMMETRIC: 1342 case ALGORITHM_RIGHT_ASYMMETRIC:
1312 *pd_idx = stripe % raid_disks; 1343 *pd_idx = stripe % raid_disks;
1313 if (*pd_idx == raid_disks-1) 1344 *qd_idx = *pd_idx + 1;
1345 if (*pd_idx == raid_disks-1) {
1314 (*dd_idx)++; /* Q D D D P */ 1346 (*dd_idx)++; /* Q D D D P */
1315 else if (*dd_idx >= *pd_idx) 1347 *qd_idx = 0;
1348 } else if (*dd_idx >= *pd_idx)
1316 (*dd_idx) += 2; /* D D P Q D */ 1349 (*dd_idx) += 2; /* D D P Q D */
1317 break; 1350 break;
1318 case ALGORITHM_LEFT_SYMMETRIC: 1351 case ALGORITHM_LEFT_SYMMETRIC:
1319 *pd_idx = raid_disks - 1 - (stripe % raid_disks); 1352 *pd_idx = raid_disks - 1 - (stripe % raid_disks);
1353 *qd_idx = (*pd_idx + 1) % raid_disks;
1320 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; 1354 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
1321 break; 1355 break;
1322 case ALGORITHM_RIGHT_SYMMETRIC: 1356 case ALGORITHM_RIGHT_SYMMETRIC:
1323 *pd_idx = stripe % raid_disks; 1357 *pd_idx = stripe % raid_disks;
1358 *qd_idx = (*pd_idx + 1) % raid_disks;
1324 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; 1359 *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
1325 break; 1360 break;
1326 default: 1361 default:
@@ -1347,7 +1382,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1347 int sectors_per_chunk = conf->chunk_size >> 9; 1382 int sectors_per_chunk = conf->chunk_size >> 9;
1348 sector_t stripe; 1383 sector_t stripe;
1349 int chunk_offset; 1384 int chunk_offset;
1350 int chunk_number, dummy1, dummy2, dd_idx = i; 1385 int chunk_number, dummy1, dummy2, dummy3, dd_idx = i;
1351 sector_t r_sector; 1386 sector_t r_sector;
1352 1387
1353 1388
@@ -1378,7 +1413,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1378 } 1413 }
1379 break; 1414 break;
1380 case 6: 1415 case 6:
1381 if (i == raid6_next_disk(sh->pd_idx, raid_disks)) 1416 if (i == sh->qd_idx)
1382 return 0; /* It is the Q disk */ 1417 return 0; /* It is the Q disk */
1383 switch (conf->algorithm) { 1418 switch (conf->algorithm) {
1384 case ALGORITHM_LEFT_ASYMMETRIC: 1419 case ALGORITHM_LEFT_ASYMMETRIC:
@@ -1411,7 +1446,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1411 1446
1412 check = raid5_compute_sector(conf, r_sector, 1447 check = raid5_compute_sector(conf, r_sector,
1413 (raid_disks != conf->raid_disks), 1448 (raid_disks != conf->raid_disks),
1414 &dummy1, &dummy2); 1449 &dummy1, &dummy2, &dummy3);
1415 if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { 1450 if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
1416 printk(KERN_ERR "compute_blocknr: map not correct\n"); 1451 printk(KERN_ERR "compute_blocknr: map not correct\n");
1417 return 0; 1452 return 0;
@@ -1480,13 +1515,14 @@ static void copy_data(int frombio, struct bio *bio,
1480static void compute_parity6(struct stripe_head *sh, int method) 1515static void compute_parity6(struct stripe_head *sh, int method)
1481{ 1516{
1482 raid5_conf_t *conf = sh->raid_conf; 1517 raid5_conf_t *conf = sh->raid_conf;
1483 int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count; 1518 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1484 struct bio *chosen; 1519 struct bio *chosen;
1485 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1520 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1486 void *ptrs[disks]; 1521 void *ptrs[disks];
1487 1522
1488 qd_idx = raid6_next_disk(pd_idx, disks); 1523 pd_idx = sh->pd_idx;
1489 d0_idx = raid6_next_disk(qd_idx, disks); 1524 qd_idx = sh->qd_idx;
1525 d0_idx = raid6_d0(sh);
1490 1526
1491 pr_debug("compute_parity, stripe %llu, method %d\n", 1527 pr_debug("compute_parity, stripe %llu, method %d\n",
1492 (unsigned long long)sh->sector, method); 1528 (unsigned long long)sh->sector, method);
@@ -1524,22 +1560,22 @@ static void compute_parity6(struct stripe_head *sh, int method)
1524 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1560 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1525 } 1561 }
1526 1562
1527// switch(method) { 1563 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1528// case RECONSTRUCT_WRITE: 1564 /* FIX: Is this ordering of drives even remotely optimal? */
1529// case CHECK_PARITY: 1565 count = 0;
1530// case UPDATE_PARITY: 1566 i = d0_idx;
1531 /* Note that unlike RAID-5, the ordering of the disks matters greatly. */ 1567 do {
1532 /* FIX: Is this ordering of drives even remotely optimal? */ 1568 int slot = raid6_idx_to_slot(i, sh, &count);
1533 count = 0; 1569 ptrs[slot] = page_address(sh->dev[i].page);
1534 i = d0_idx; 1570 if (slot < sh->disks - 2 &&
1535 do { 1571 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1536 ptrs[count++] = page_address(sh->dev[i].page); 1572 printk(KERN_ERR "block %d/%d not uptodate "
1537 if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags)) 1573 "on parity calc\n", i, count);
1538 printk("block %d/%d not uptodate on parity calc\n", i,count); 1574 BUG();
1539 i = raid6_next_disk(i, disks); 1575 }
1540 } while ( i != d0_idx ); 1576 i = raid6_next_disk(i, disks);
1541// break; 1577 } while (i != d0_idx);
1542// } 1578 BUG_ON(count+2 != disks);
1543 1579
1544 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); 1580 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
1545 1581
@@ -1563,8 +1599,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1563{ 1599{
1564 int i, count, disks = sh->disks; 1600 int i, count, disks = sh->disks;
1565 void *ptr[MAX_XOR_BLOCKS], *dest, *p; 1601 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1566 int pd_idx = sh->pd_idx; 1602 int qd_idx = sh->qd_idx;
1567 int qd_idx = raid6_next_disk(pd_idx, disks);
1568 1603
1569 pr_debug("compute_block_1, stripe %llu, idx %d\n", 1604 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1570 (unsigned long long)sh->sector, dd_idx); 1605 (unsigned long long)sh->sector, dd_idx);
@@ -1600,21 +1635,31 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1600static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) 1635static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1601{ 1636{
1602 int i, count, disks = sh->disks; 1637 int i, count, disks = sh->disks;
1603 int pd_idx = sh->pd_idx; 1638 int d0_idx = raid6_d0(sh);
1604 int qd_idx = raid6_next_disk(pd_idx, disks); 1639 int faila = -1, failb = -1;
1605 int d0_idx = raid6_next_disk(qd_idx, disks); 1640 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1606 int faila, failb; 1641 void *ptrs[disks];
1607 1642
1608 /* faila and failb are disk numbers relative to d0_idx */ 1643 count = 0;
1609 /* pd_idx become disks-2 and qd_idx become disks-1 */ 1644 i = d0_idx;
1610 faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx; 1645 do {
1611 failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx; 1646 int slot;
1647 slot = raid6_idx_to_slot(i, sh, &count);
1648 ptrs[slot] = page_address(sh->dev[i].page);
1649 if (i == dd_idx1)
1650 faila = slot;
1651 if (i == dd_idx2)
1652 failb = slot;
1653 i = raid6_next_disk(i, disks);
1654 } while (i != d0_idx);
1655 BUG_ON(count+2 != disks);
1612 1656
1613 BUG_ON(faila == failb); 1657 BUG_ON(faila == failb);
1614 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } 1658 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1615 1659
1616 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", 1660 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1617 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); 1661 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1662 faila, failb);
1618 1663
1619 if ( failb == disks-1 ) { 1664 if ( failb == disks-1 ) {
1620 /* Q disk is one of the missing disks */ 1665 /* Q disk is one of the missing disks */
@@ -1624,39 +1669,26 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1624 return; 1669 return;
1625 } else { 1670 } else {
1626 /* We're missing D+Q; recompute D from P */ 1671 /* We're missing D+Q; recompute D from P */
1627 compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); 1672 compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
1673 dd_idx2 : dd_idx1),
1674 0);
1628 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ 1675 compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
1629 return; 1676 return;
1630 } 1677 }
1631 } 1678 }
1632 1679
1633 /* We're missing D+P or D+D; build pointer table */ 1680 /* We're missing D+P or D+D; */
1634 { 1681 if (failb == disks-2) {
1635 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1682 /* We're missing D+P. */
1636 void *ptrs[disks]; 1683 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
1637 1684 } else {
1638 count = 0; 1685 /* We're missing D+D. */
1639 i = d0_idx; 1686 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
1640 do {
1641 ptrs[count++] = page_address(sh->dev[i].page);
1642 i = raid6_next_disk(i, disks);
1643 if (i != dd_idx1 && i != dd_idx2 &&
1644 !test_bit(R5_UPTODATE, &sh->dev[i].flags))
1645 printk("compute_2 with missing block %d/%d\n", count, i);
1646 } while ( i != d0_idx );
1647
1648 if ( failb == disks-2 ) {
1649 /* We're missing D+P. */
1650 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
1651 } else {
1652 /* We're missing D+D. */
1653 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
1654 }
1655
1656 /* Both the above update both missing blocks */
1657 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1658 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1659 } 1687 }
1688
1689 /* Both the above update both missing blocks */
1690 set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
1691 set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
1660} 1692}
1661 1693
1662static void 1694static void
@@ -1811,7 +1843,8 @@ static int page_is_zero(struct page *p)
1811 memcmp(a, a+4, STRIPE_SIZE-4)==0); 1843 memcmp(a, a+4, STRIPE_SIZE-4)==0);
1812} 1844}
1813 1845
1814static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous) 1846static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous,
1847 int *qd_idxp)
1815{ 1848{
1816 int sectors_per_chunk = conf->chunk_size >> 9; 1849 int sectors_per_chunk = conf->chunk_size >> 9;
1817 int pd_idx, dd_idx; 1850 int pd_idx, dd_idx;
@@ -1822,7 +1855,7 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous)
1822 stripe * (disks - conf->max_degraded) 1855 stripe * (disks - conf->max_degraded)
1823 *sectors_per_chunk + chunk_offset, 1856 *sectors_per_chunk + chunk_offset,
1824 previous, 1857 previous,
1825 &dd_idx, &pd_idx); 1858 &dd_idx, &pd_idx, qd_idxp);
1826 return pd_idx; 1859 return pd_idx;
1827} 1860}
1828 1861
@@ -2481,12 +2514,13 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2481 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2514 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2482 for (i = 0; i < sh->disks; i++) 2515 for (i = 0; i < sh->disks; i++)
2483 if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) { 2516 if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) {
2484 int dd_idx, pd_idx, j; 2517 int dd_idx, pd_idx, qd_idx, j;
2485 struct stripe_head *sh2; 2518 struct stripe_head *sh2;
2486 2519
2487 sector_t bn = compute_blocknr(sh, i); 2520 sector_t bn = compute_blocknr(sh, i);
2488 sector_t s = raid5_compute_sector(conf, bn, 0, 2521 sector_t s =
2489 &dd_idx, &pd_idx); 2522 raid5_compute_sector(conf, bn, 0,
2523 &dd_idx, &pd_idx, &qd_idx);
2490 sh2 = get_active_stripe(conf, s, 0, 1); 2524 sh2 = get_active_stripe(conf, s, 0, 1);
2491 if (sh2 == NULL) 2525 if (sh2 == NULL)
2492 /* so far only the early blocks of this stripe 2526 /* so far only the early blocks of this stripe
@@ -2510,8 +2544,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2510 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2544 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
2511 for (j = 0; j < conf->raid_disks; j++) 2545 for (j = 0; j < conf->raid_disks; j++)
2512 if (j != sh2->pd_idx && 2546 if (j != sh2->pd_idx &&
2513 (!r6s || j != raid6_next_disk(sh2->pd_idx, 2547 (!r6s || j != sh2->qd_idx) &&
2514 sh2->disks)) &&
2515 !test_bit(R5_Expanded, &sh2->dev[j].flags)) 2548 !test_bit(R5_Expanded, &sh2->dev[j].flags))
2516 break; 2549 break;
2517 if (j == conf->raid_disks) { 2550 if (j == conf->raid_disks) {
@@ -2771,9 +2804,11 @@ static bool handle_stripe5(struct stripe_head *sh)
2771 2804
2772 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && 2805 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
2773 !sh->reconstruct_state) { 2806 !sh->reconstruct_state) {
2807 int qd_idx;
2774 /* Need to write out all blocks after computing parity */ 2808 /* Need to write out all blocks after computing parity */
2775 sh->disks = conf->raid_disks; 2809 sh->disks = conf->raid_disks;
2776 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0); 2810 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0, &qd_idx);
2811 sh->qd_idx = qd_idx;
2777 schedule_reconstruction5(sh, &s, 1, 1); 2812 schedule_reconstruction5(sh, &s, 1, 1);
2778 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { 2813 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
2779 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2814 clear_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -2814,7 +2849,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2814 struct r5dev *dev, *pdev, *qdev; 2849 struct r5dev *dev, *pdev, *qdev;
2815 mdk_rdev_t *blocked_rdev = NULL; 2850 mdk_rdev_t *blocked_rdev = NULL;
2816 2851
2817 r6s.qd_idx = raid6_next_disk(pd_idx, disks); 2852 r6s.qd_idx = sh->qd_idx;
2818 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 2853 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
2819 "pd_idx=%d, qd_idx=%d\n", 2854 "pd_idx=%d, qd_idx=%d\n",
2820 (unsigned long long)sh->sector, sh->state, 2855 (unsigned long long)sh->sector, sh->state,
@@ -2990,8 +3025,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2990 3025
2991 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3026 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
2992 /* Need to write out all blocks after computing P&Q */ 3027 /* Need to write out all blocks after computing P&Q */
3028 int qd_idx;
2993 sh->disks = conf->raid_disks; 3029 sh->disks = conf->raid_disks;
2994 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0); 3030 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0, &qd_idx);
3031 sh->qd_idx = qd_idx;
2995 compute_parity6(sh, RECONSTRUCT_WRITE); 3032 compute_parity6(sh, RECONSTRUCT_WRITE);
2996 for (i = conf->raid_disks ; i-- ; ) { 3033 for (i = conf->raid_disks ; i-- ; ) {
2997 set_bit(R5_LOCKED, &sh->dev[i].flags); 3034 set_bit(R5_LOCKED, &sh->dev[i].flags);
@@ -3263,7 +3300,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3263{ 3300{
3264 mddev_t *mddev = q->queuedata; 3301 mddev_t *mddev = q->queuedata;
3265 raid5_conf_t *conf = mddev_to_conf(mddev); 3302 raid5_conf_t *conf = mddev_to_conf(mddev);
3266 unsigned int dd_idx, pd_idx; 3303 unsigned int dd_idx, pd_idx, qd_idx;
3267 struct bio* align_bi; 3304 struct bio* align_bi;
3268 mdk_rdev_t *rdev; 3305 mdk_rdev_t *rdev;
3269 3306
@@ -3288,7 +3325,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3288 */ 3325 */
3289 align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, 3326 align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector,
3290 0, 3327 0,
3291 &dd_idx, &pd_idx); 3328 &dd_idx, &pd_idx, &qd_idx);
3292 3329
3293 rcu_read_lock(); 3330 rcu_read_lock();
3294 rdev = rcu_dereference(conf->disks[dd_idx].rdev); 3331 rdev = rcu_dereference(conf->disks[dd_idx].rdev);
@@ -3380,7 +3417,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3380{ 3417{
3381 mddev_t *mddev = q->queuedata; 3418 mddev_t *mddev = q->queuedata;
3382 raid5_conf_t *conf = mddev_to_conf(mddev); 3419 raid5_conf_t *conf = mddev_to_conf(mddev);
3383 unsigned int dd_idx, pd_idx; 3420 int dd_idx, pd_idx, qd_idx;
3384 sector_t new_sector; 3421 sector_t new_sector;
3385 sector_t logical_sector, last_sector; 3422 sector_t logical_sector, last_sector;
3386 struct stripe_head *sh; 3423 struct stripe_head *sh;
@@ -3447,7 +3484,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3447 3484
3448 new_sector = raid5_compute_sector(conf, logical_sector, 3485 new_sector = raid5_compute_sector(conf, logical_sector,
3449 previous, 3486 previous,
3450 &dd_idx, &pd_idx); 3487 &dd_idx, &pd_idx, &qd_idx);
3451 pr_debug("raid5: make_request, sector %llu logical %llu\n", 3488 pr_debug("raid5: make_request, sector %llu logical %llu\n",
3452 (unsigned long long)new_sector, 3489 (unsigned long long)new_sector,
3453 (unsigned long long)logical_sector); 3490 (unsigned long long)logical_sector);
@@ -3535,7 +3572,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3535 */ 3572 */
3536 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 3573 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
3537 struct stripe_head *sh; 3574 struct stripe_head *sh;
3538 int pd_idx; 3575 int pd_idx, qd_idx;
3539 sector_t first_sector, last_sector; 3576 sector_t first_sector, last_sector;
3540 int raid_disks = conf->previous_raid_disks; 3577 int raid_disks = conf->previous_raid_disks;
3541 int data_disks = raid_disks - conf->max_degraded; 3578 int data_disks = raid_disks - conf->max_degraded;
@@ -3598,7 +3635,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3598 if (j == sh->pd_idx) 3635 if (j == sh->pd_idx)
3599 continue; 3636 continue;
3600 if (conf->level == 6 && 3637 if (conf->level == 6 &&
3601 j == raid6_next_disk(sh->pd_idx, sh->disks)) 3638 j == sh->qd_idx)
3602 continue; 3639 continue;
3603 s = compute_blocknr(sh, j); 3640 s = compute_blocknr(sh, j);
3604 if (s < mddev->array_sectors) { 3641 if (s < mddev->array_sectors) {
@@ -3625,11 +3662,11 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3625 */ 3662 */
3626 first_sector = 3663 first_sector =
3627 raid5_compute_sector(conf, sector_nr*(new_data_disks), 3664 raid5_compute_sector(conf, sector_nr*(new_data_disks),
3628 1, &dd_idx, &pd_idx); 3665 1, &dd_idx, &pd_idx, &qd_idx);
3629 last_sector = 3666 last_sector =
3630 raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512) 3667 raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512)
3631 *(new_data_disks) - 1), 3668 *(new_data_disks) - 1),
3632 1, &dd_idx, &pd_idx); 3669 1, &dd_idx, &pd_idx, &qd_idx);
3633 if (last_sector >= mddev->dev_sectors) 3670 if (last_sector >= mddev->dev_sectors)
3634 last_sector = mddev->dev_sectors - 1; 3671 last_sector = mddev->dev_sectors - 1;
3635 while (first_sector <= last_sector) { 3672 while (first_sector <= last_sector) {
@@ -3764,7 +3801,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3764 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. 3801 * it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
3765 */ 3802 */
3766 struct stripe_head *sh; 3803 struct stripe_head *sh;
3767 int dd_idx, pd_idx; 3804 int dd_idx, pd_idx, qd_idx;
3768 sector_t sector, logical_sector, last_sector; 3805 sector_t sector, logical_sector, last_sector;
3769 int scnt = 0; 3806 int scnt = 0;
3770 int remaining; 3807 int remaining;
@@ -3772,7 +3809,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3772 3809
3773 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 3810 logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
3774 sector = raid5_compute_sector(conf, logical_sector, 3811 sector = raid5_compute_sector(conf, logical_sector,
3775 0, &dd_idx, &pd_idx); 3812 0, &dd_idx, &pd_idx, &qd_idx);
3776 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); 3813 last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
3777 3814
3778 for (; logical_sector < last_sector; 3815 for (; logical_sector < last_sector;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 0ed22dff56e0..0c7375ad12bd 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -196,15 +196,16 @@ enum reconstruct_states {
196 196
197struct stripe_head { 197struct stripe_head {
198 struct hlist_node hash; 198 struct hlist_node hash;
199 struct list_head lru; /* inactive_list or handle_list */ 199 struct list_head lru; /* inactive_list or handle_list */
200 struct raid5_private_data *raid_conf; 200 struct raid5_private_data *raid_conf;
201 sector_t sector; /* sector of this row */ 201 sector_t sector; /* sector of this row */
202 int pd_idx; /* parity disk index */ 202 short pd_idx; /* parity disk index */
203 unsigned long state; /* state flags */ 203 short qd_idx; /* 'Q' disk index for raid6 */
204 atomic_t count; /* nr of active thread/requests */ 204 unsigned long state; /* state flags */
205 atomic_t count; /* nr of active thread/requests */
205 spinlock_t lock; 206 spinlock_t lock;
206 int bm_seq; /* sequence number for bitmap flushes */ 207 int bm_seq; /* sequence number for bitmap flushes */
207 int disks; /* disks in stripe */ 208 int disks; /* disks in stripe */
208 enum check_states check_state; 209 enum check_states check_state;
209 enum reconstruct_states reconstruct_state; 210 enum reconstruct_states reconstruct_state;
210 /* stripe_operations 211 /* stripe_operations