diff options
author | NeilBrown <neilb@suse.de> | 2009-03-30 23:39:38 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-03-30 23:39:38 -0400 |
commit | d0dabf7e577411c2bf6b616c751544dc241213d4 (patch) | |
tree | 26a41b66f1ae83e2127eceace281332134518eb0 | |
parent | 112bf8970dbdfc00bd4667da5996e57c2ce58066 (diff) |
md/raid6: remove expectation that Q device is immediately after P device.
Code currently assumes that the devices in a raid6 stripe are
0 1 ... N-1 P Q
in some rotated order. We will shortly add new layouts in which
this strict pattern is broken.
So remove this expectation. We still assume that the data disks
are roughly in-order. However P and Q can be inserted anywhere within
that order.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 211 | ||||
-rw-r--r-- | drivers/md/raid5.h | 15 |
2 files changed, 132 insertions, 94 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c33073fe7426..cb3e157b52d3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -133,12 +133,36 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | |||
133 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); | 133 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); |
134 | } | 134 | } |
135 | 135 | ||
136 | /* Find first data disk in a raid6 stripe */ | ||
137 | static inline int raid6_d0(struct stripe_head *sh) | ||
138 | { | ||
139 | if (sh->qd_idx == sh->disks - 1) | ||
140 | return 0; | ||
141 | else | ||
142 | return sh->qd_idx + 1; | ||
143 | } | ||
136 | static inline int raid6_next_disk(int disk, int raid_disks) | 144 | static inline int raid6_next_disk(int disk, int raid_disks) |
137 | { | 145 | { |
138 | disk++; | 146 | disk++; |
139 | return (disk < raid_disks) ? disk : 0; | 147 | return (disk < raid_disks) ? disk : 0; |
140 | } | 148 | } |
141 | 149 | ||
150 | /* When walking through the disks in a raid5, starting at raid6_d0, | ||
151 | * We need to map each disk to a 'slot', where the data disks are slot | ||
152 | * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk | ||
153 | * is raid_disks-1. This help does that mapping. | ||
154 | */ | ||
155 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count) | ||
156 | { | ||
157 | int slot; | ||
158 | if (idx == sh->pd_idx) | ||
159 | return sh->disks - 2; | ||
160 | if (idx == sh->qd_idx) | ||
161 | return sh->disks - 1; | ||
162 | slot = (*count)++; | ||
163 | return slot; | ||
164 | } | ||
165 | |||
142 | static void return_io(struct bio *return_bi) | 166 | static void return_io(struct bio *return_bi) |
143 | { | 167 | { |
144 | struct bio *bi = return_bi; | 168 | struct bio *bi = return_bi; |
@@ -196,6 +220,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
196 | } | 220 | } |
197 | } | 221 | } |
198 | } | 222 | } |
223 | |||
199 | static void release_stripe(struct stripe_head *sh) | 224 | static void release_stripe(struct stripe_head *sh) |
200 | { | 225 | { |
201 | raid5_conf_t *conf = sh->raid_conf; | 226 | raid5_conf_t *conf = sh->raid_conf; |
@@ -274,12 +299,14 @@ static int grow_buffers(struct stripe_head *sh, int num) | |||
274 | } | 299 | } |
275 | 300 | ||
276 | static void raid5_build_block(struct stripe_head *sh, int i); | 301 | static void raid5_build_block(struct stripe_head *sh, int i); |
277 | static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous); | 302 | static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous, |
303 | int *qd_idx); | ||
278 | 304 | ||
279 | static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | 305 | static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) |
280 | { | 306 | { |
281 | raid5_conf_t *conf = sh->raid_conf; | 307 | raid5_conf_t *conf = sh->raid_conf; |
282 | int i; | 308 | int i; |
309 | int qd_idx; | ||
283 | 310 | ||
284 | BUG_ON(atomic_read(&sh->count) != 0); | 311 | BUG_ON(atomic_read(&sh->count) != 0); |
285 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); | 312 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); |
@@ -293,7 +320,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) | |||
293 | 320 | ||
294 | sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; | 321 | sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks; |
295 | sh->sector = sector; | 322 | sh->sector = sector; |
296 | sh->pd_idx = stripe_to_pdidx(sector, conf, previous); | 323 | sh->pd_idx = stripe_to_pdidx(sector, conf, previous, &qd_idx); |
324 | sh->qd_idx = qd_idx; | ||
297 | sh->state = 0; | 325 | sh->state = 0; |
298 | 326 | ||
299 | 327 | ||
@@ -1235,7 +1263,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1235 | */ | 1263 | */ |
1236 | static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | 1264 | static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, |
1237 | int previous, | 1265 | int previous, |
1238 | int *dd_idx, int *pd_idx) | 1266 | int *dd_idx, int *pd_idx, int *qd_idx) |
1239 | { | 1267 | { |
1240 | long stripe; | 1268 | long stripe; |
1241 | unsigned long chunk_number; | 1269 | unsigned long chunk_number; |
@@ -1268,6 +1296,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1268 | /* | 1296 | /* |
1269 | * Select the parity disk based on the user selected algorithm. | 1297 | * Select the parity disk based on the user selected algorithm. |
1270 | */ | 1298 | */ |
1299 | *qd_idx = ~0; | ||
1271 | switch(conf->level) { | 1300 | switch(conf->level) { |
1272 | case 4: | 1301 | case 4: |
1273 | *pd_idx = data_disks; | 1302 | *pd_idx = data_disks; |
@@ -1303,24 +1332,30 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1303 | switch (conf->algorithm) { | 1332 | switch (conf->algorithm) { |
1304 | case ALGORITHM_LEFT_ASYMMETRIC: | 1333 | case ALGORITHM_LEFT_ASYMMETRIC: |
1305 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1334 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); |
1306 | if (*pd_idx == raid_disks-1) | 1335 | *qd_idx = *pd_idx + 1; |
1336 | if (*pd_idx == raid_disks-1) { | ||
1307 | (*dd_idx)++; /* Q D D D P */ | 1337 | (*dd_idx)++; /* Q D D D P */ |
1308 | else if (*dd_idx >= *pd_idx) | 1338 | *qd_idx = 0; |
1339 | } else if (*dd_idx >= *pd_idx) | ||
1309 | (*dd_idx) += 2; /* D D P Q D */ | 1340 | (*dd_idx) += 2; /* D D P Q D */ |
1310 | break; | 1341 | break; |
1311 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1342 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1312 | *pd_idx = stripe % raid_disks; | 1343 | *pd_idx = stripe % raid_disks; |
1313 | if (*pd_idx == raid_disks-1) | 1344 | *qd_idx = *pd_idx + 1; |
1345 | if (*pd_idx == raid_disks-1) { | ||
1314 | (*dd_idx)++; /* Q D D D P */ | 1346 | (*dd_idx)++; /* Q D D D P */ |
1315 | else if (*dd_idx >= *pd_idx) | 1347 | *qd_idx = 0; |
1348 | } else if (*dd_idx >= *pd_idx) | ||
1316 | (*dd_idx) += 2; /* D D P Q D */ | 1349 | (*dd_idx) += 2; /* D D P Q D */ |
1317 | break; | 1350 | break; |
1318 | case ALGORITHM_LEFT_SYMMETRIC: | 1351 | case ALGORITHM_LEFT_SYMMETRIC: |
1319 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1352 | *pd_idx = raid_disks - 1 - (stripe % raid_disks); |
1353 | *qd_idx = (*pd_idx + 1) % raid_disks; | ||
1320 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; | 1354 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; |
1321 | break; | 1355 | break; |
1322 | case ALGORITHM_RIGHT_SYMMETRIC: | 1356 | case ALGORITHM_RIGHT_SYMMETRIC: |
1323 | *pd_idx = stripe % raid_disks; | 1357 | *pd_idx = stripe % raid_disks; |
1358 | *qd_idx = (*pd_idx + 1) % raid_disks; | ||
1324 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; | 1359 | *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks; |
1325 | break; | 1360 | break; |
1326 | default: | 1361 | default: |
@@ -1347,7 +1382,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1347 | int sectors_per_chunk = conf->chunk_size >> 9; | 1382 | int sectors_per_chunk = conf->chunk_size >> 9; |
1348 | sector_t stripe; | 1383 | sector_t stripe; |
1349 | int chunk_offset; | 1384 | int chunk_offset; |
1350 | int chunk_number, dummy1, dummy2, dd_idx = i; | 1385 | int chunk_number, dummy1, dummy2, dummy3, dd_idx = i; |
1351 | sector_t r_sector; | 1386 | sector_t r_sector; |
1352 | 1387 | ||
1353 | 1388 | ||
@@ -1378,7 +1413,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1378 | } | 1413 | } |
1379 | break; | 1414 | break; |
1380 | case 6: | 1415 | case 6: |
1381 | if (i == raid6_next_disk(sh->pd_idx, raid_disks)) | 1416 | if (i == sh->qd_idx) |
1382 | return 0; /* It is the Q disk */ | 1417 | return 0; /* It is the Q disk */ |
1383 | switch (conf->algorithm) { | 1418 | switch (conf->algorithm) { |
1384 | case ALGORITHM_LEFT_ASYMMETRIC: | 1419 | case ALGORITHM_LEFT_ASYMMETRIC: |
@@ -1411,7 +1446,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i) | |||
1411 | 1446 | ||
1412 | check = raid5_compute_sector(conf, r_sector, | 1447 | check = raid5_compute_sector(conf, r_sector, |
1413 | (raid_disks != conf->raid_disks), | 1448 | (raid_disks != conf->raid_disks), |
1414 | &dummy1, &dummy2); | 1449 | &dummy1, &dummy2, &dummy3); |
1415 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { | 1450 | if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) { |
1416 | printk(KERN_ERR "compute_blocknr: map not correct\n"); | 1451 | printk(KERN_ERR "compute_blocknr: map not correct\n"); |
1417 | return 0; | 1452 | return 0; |
@@ -1480,13 +1515,14 @@ static void copy_data(int frombio, struct bio *bio, | |||
1480 | static void compute_parity6(struct stripe_head *sh, int method) | 1515 | static void compute_parity6(struct stripe_head *sh, int method) |
1481 | { | 1516 | { |
1482 | raid5_conf_t *conf = sh->raid_conf; | 1517 | raid5_conf_t *conf = sh->raid_conf; |
1483 | int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | 1518 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; |
1484 | struct bio *chosen; | 1519 | struct bio *chosen; |
1485 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1520 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1486 | void *ptrs[disks]; | 1521 | void *ptrs[disks]; |
1487 | 1522 | ||
1488 | qd_idx = raid6_next_disk(pd_idx, disks); | 1523 | pd_idx = sh->pd_idx; |
1489 | d0_idx = raid6_next_disk(qd_idx, disks); | 1524 | qd_idx = sh->qd_idx; |
1525 | d0_idx = raid6_d0(sh); | ||
1490 | 1526 | ||
1491 | pr_debug("compute_parity, stripe %llu, method %d\n", | 1527 | pr_debug("compute_parity, stripe %llu, method %d\n", |
1492 | (unsigned long long)sh->sector, method); | 1528 | (unsigned long long)sh->sector, method); |
@@ -1524,22 +1560,22 @@ static void compute_parity6(struct stripe_head *sh, int method) | |||
1524 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1560 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1525 | } | 1561 | } |
1526 | 1562 | ||
1527 | // switch(method) { | 1563 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ |
1528 | // case RECONSTRUCT_WRITE: | 1564 | /* FIX: Is this ordering of drives even remotely optimal? */ |
1529 | // case CHECK_PARITY: | 1565 | count = 0; |
1530 | // case UPDATE_PARITY: | 1566 | i = d0_idx; |
1531 | /* Note that unlike RAID-5, the ordering of the disks matters greatly. */ | 1567 | do { |
1532 | /* FIX: Is this ordering of drives even remotely optimal? */ | 1568 | int slot = raid6_idx_to_slot(i, sh, &count); |
1533 | count = 0; | 1569 | ptrs[slot] = page_address(sh->dev[i].page); |
1534 | i = d0_idx; | 1570 | if (slot < sh->disks - 2 && |
1535 | do { | 1571 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { |
1536 | ptrs[count++] = page_address(sh->dev[i].page); | 1572 | printk(KERN_ERR "block %d/%d not uptodate " |
1537 | if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags)) | 1573 | "on parity calc\n", i, count); |
1538 | printk("block %d/%d not uptodate on parity calc\n", i,count); | 1574 | BUG(); |
1539 | i = raid6_next_disk(i, disks); | 1575 | } |
1540 | } while ( i != d0_idx ); | 1576 | i = raid6_next_disk(i, disks); |
1541 | // break; | 1577 | } while (i != d0_idx); |
1542 | // } | 1578 | BUG_ON(count+2 != disks); |
1543 | 1579 | ||
1544 | raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); | 1580 | raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); |
1545 | 1581 | ||
@@ -1563,8 +1599,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | |||
1563 | { | 1599 | { |
1564 | int i, count, disks = sh->disks; | 1600 | int i, count, disks = sh->disks; |
1565 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; | 1601 | void *ptr[MAX_XOR_BLOCKS], *dest, *p; |
1566 | int pd_idx = sh->pd_idx; | 1602 | int qd_idx = sh->qd_idx; |
1567 | int qd_idx = raid6_next_disk(pd_idx, disks); | ||
1568 | 1603 | ||
1569 | pr_debug("compute_block_1, stripe %llu, idx %d\n", | 1604 | pr_debug("compute_block_1, stripe %llu, idx %d\n", |
1570 | (unsigned long long)sh->sector, dd_idx); | 1605 | (unsigned long long)sh->sector, dd_idx); |
@@ -1600,21 +1635,31 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | |||
1600 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | 1635 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) |
1601 | { | 1636 | { |
1602 | int i, count, disks = sh->disks; | 1637 | int i, count, disks = sh->disks; |
1603 | int pd_idx = sh->pd_idx; | 1638 | int d0_idx = raid6_d0(sh); |
1604 | int qd_idx = raid6_next_disk(pd_idx, disks); | 1639 | int faila = -1, failb = -1; |
1605 | int d0_idx = raid6_next_disk(qd_idx, disks); | 1640 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1606 | int faila, failb; | 1641 | void *ptrs[disks]; |
1607 | 1642 | ||
1608 | /* faila and failb are disk numbers relative to d0_idx */ | 1643 | count = 0; |
1609 | /* pd_idx become disks-2 and qd_idx become disks-1 */ | 1644 | i = d0_idx; |
1610 | faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx; | 1645 | do { |
1611 | failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx; | 1646 | int slot; |
1647 | slot = raid6_idx_to_slot(i, sh, &count); | ||
1648 | ptrs[slot] = page_address(sh->dev[i].page); | ||
1649 | if (i == dd_idx1) | ||
1650 | faila = slot; | ||
1651 | if (i == dd_idx2) | ||
1652 | failb = slot; | ||
1653 | i = raid6_next_disk(i, disks); | ||
1654 | } while (i != d0_idx); | ||
1655 | BUG_ON(count+2 != disks); | ||
1612 | 1656 | ||
1613 | BUG_ON(faila == failb); | 1657 | BUG_ON(faila == failb); |
1614 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | 1658 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } |
1615 | 1659 | ||
1616 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", | 1660 | pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", |
1617 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); | 1661 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, |
1662 | faila, failb); | ||
1618 | 1663 | ||
1619 | if ( failb == disks-1 ) { | 1664 | if ( failb == disks-1 ) { |
1620 | /* Q disk is one of the missing disks */ | 1665 | /* Q disk is one of the missing disks */ |
@@ -1624,39 +1669,26 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |||
1624 | return; | 1669 | return; |
1625 | } else { | 1670 | } else { |
1626 | /* We're missing D+Q; recompute D from P */ | 1671 | /* We're missing D+Q; recompute D from P */ |
1627 | compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0); | 1672 | compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ? |
1673 | dd_idx2 : dd_idx1), | ||
1674 | 0); | ||
1628 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ | 1675 | compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */ |
1629 | return; | 1676 | return; |
1630 | } | 1677 | } |
1631 | } | 1678 | } |
1632 | 1679 | ||
1633 | /* We're missing D+P or D+D; build pointer table */ | 1680 | /* We're missing D+P or D+D; */ |
1634 | { | 1681 | if (failb == disks-2) { |
1635 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1682 | /* We're missing D+P. */ |
1636 | void *ptrs[disks]; | 1683 | raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); |
1637 | 1684 | } else { | |
1638 | count = 0; | 1685 | /* We're missing D+D. */ |
1639 | i = d0_idx; | 1686 | raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); |
1640 | do { | ||
1641 | ptrs[count++] = page_address(sh->dev[i].page); | ||
1642 | i = raid6_next_disk(i, disks); | ||
1643 | if (i != dd_idx1 && i != dd_idx2 && | ||
1644 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) | ||
1645 | printk("compute_2 with missing block %d/%d\n", count, i); | ||
1646 | } while ( i != d0_idx ); | ||
1647 | |||
1648 | if ( failb == disks-2 ) { | ||
1649 | /* We're missing D+P. */ | ||
1650 | raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); | ||
1651 | } else { | ||
1652 | /* We're missing D+D. */ | ||
1653 | raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); | ||
1654 | } | ||
1655 | |||
1656 | /* Both the above update both missing blocks */ | ||
1657 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1658 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1659 | } | 1687 | } |
1688 | |||
1689 | /* Both the above update both missing blocks */ | ||
1690 | set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags); | ||
1691 | set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags); | ||
1660 | } | 1692 | } |
1661 | 1693 | ||
1662 | static void | 1694 | static void |
@@ -1811,7 +1843,8 @@ static int page_is_zero(struct page *p) | |||
1811 | memcmp(a, a+4, STRIPE_SIZE-4)==0); | 1843 | memcmp(a, a+4, STRIPE_SIZE-4)==0); |
1812 | } | 1844 | } |
1813 | 1845 | ||
1814 | static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous) | 1846 | static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous, |
1847 | int *qd_idxp) | ||
1815 | { | 1848 | { |
1816 | int sectors_per_chunk = conf->chunk_size >> 9; | 1849 | int sectors_per_chunk = conf->chunk_size >> 9; |
1817 | int pd_idx, dd_idx; | 1850 | int pd_idx, dd_idx; |
@@ -1822,7 +1855,7 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int previous) | |||
1822 | stripe * (disks - conf->max_degraded) | 1855 | stripe * (disks - conf->max_degraded) |
1823 | *sectors_per_chunk + chunk_offset, | 1856 | *sectors_per_chunk + chunk_offset, |
1824 | previous, | 1857 | previous, |
1825 | &dd_idx, &pd_idx); | 1858 | &dd_idx, &pd_idx, qd_idxp); |
1826 | return pd_idx; | 1859 | return pd_idx; |
1827 | } | 1860 | } |
1828 | 1861 | ||
@@ -2481,12 +2514,13 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2481 | clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); | 2514 | clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); |
2482 | for (i = 0; i < sh->disks; i++) | 2515 | for (i = 0; i < sh->disks; i++) |
2483 | if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) { | 2516 | if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) { |
2484 | int dd_idx, pd_idx, j; | 2517 | int dd_idx, pd_idx, qd_idx, j; |
2485 | struct stripe_head *sh2; | 2518 | struct stripe_head *sh2; |
2486 | 2519 | ||
2487 | sector_t bn = compute_blocknr(sh, i); | 2520 | sector_t bn = compute_blocknr(sh, i); |
2488 | sector_t s = raid5_compute_sector(conf, bn, 0, | 2521 | sector_t s = |
2489 | &dd_idx, &pd_idx); | 2522 | raid5_compute_sector(conf, bn, 0, |
2523 | &dd_idx, &pd_idx, &qd_idx); | ||
2490 | sh2 = get_active_stripe(conf, s, 0, 1); | 2524 | sh2 = get_active_stripe(conf, s, 0, 1); |
2491 | if (sh2 == NULL) | 2525 | if (sh2 == NULL) |
2492 | /* so far only the early blocks of this stripe | 2526 | /* so far only the early blocks of this stripe |
@@ -2510,8 +2544,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, | |||
2510 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); | 2544 | set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); |
2511 | for (j = 0; j < conf->raid_disks; j++) | 2545 | for (j = 0; j < conf->raid_disks; j++) |
2512 | if (j != sh2->pd_idx && | 2546 | if (j != sh2->pd_idx && |
2513 | (!r6s || j != raid6_next_disk(sh2->pd_idx, | 2547 | (!r6s || j != sh2->qd_idx) && |
2514 | sh2->disks)) && | ||
2515 | !test_bit(R5_Expanded, &sh2->dev[j].flags)) | 2548 | !test_bit(R5_Expanded, &sh2->dev[j].flags)) |
2516 | break; | 2549 | break; |
2517 | if (j == conf->raid_disks) { | 2550 | if (j == conf->raid_disks) { |
@@ -2771,9 +2804,11 @@ static bool handle_stripe5(struct stripe_head *sh) | |||
2771 | 2804 | ||
2772 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && | 2805 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && |
2773 | !sh->reconstruct_state) { | 2806 | !sh->reconstruct_state) { |
2807 | int qd_idx; | ||
2774 | /* Need to write out all blocks after computing parity */ | 2808 | /* Need to write out all blocks after computing parity */ |
2775 | sh->disks = conf->raid_disks; | 2809 | sh->disks = conf->raid_disks; |
2776 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0); | 2810 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0, &qd_idx); |
2811 | sh->qd_idx = qd_idx; | ||
2777 | schedule_reconstruction5(sh, &s, 1, 1); | 2812 | schedule_reconstruction5(sh, &s, 1, 1); |
2778 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { | 2813 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
2779 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 2814 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
@@ -2814,7 +2849,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2814 | struct r5dev *dev, *pdev, *qdev; | 2849 | struct r5dev *dev, *pdev, *qdev; |
2815 | mdk_rdev_t *blocked_rdev = NULL; | 2850 | mdk_rdev_t *blocked_rdev = NULL; |
2816 | 2851 | ||
2817 | r6s.qd_idx = raid6_next_disk(pd_idx, disks); | 2852 | r6s.qd_idx = sh->qd_idx; |
2818 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 2853 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
2819 | "pd_idx=%d, qd_idx=%d\n", | 2854 | "pd_idx=%d, qd_idx=%d\n", |
2820 | (unsigned long long)sh->sector, sh->state, | 2855 | (unsigned long long)sh->sector, sh->state, |
@@ -2990,8 +3025,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
2990 | 3025 | ||
2991 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { | 3026 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { |
2992 | /* Need to write out all blocks after computing P&Q */ | 3027 | /* Need to write out all blocks after computing P&Q */ |
3028 | int qd_idx; | ||
2993 | sh->disks = conf->raid_disks; | 3029 | sh->disks = conf->raid_disks; |
2994 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0); | 3030 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 0, &qd_idx); |
3031 | sh->qd_idx = qd_idx; | ||
2995 | compute_parity6(sh, RECONSTRUCT_WRITE); | 3032 | compute_parity6(sh, RECONSTRUCT_WRITE); |
2996 | for (i = conf->raid_disks ; i-- ; ) { | 3033 | for (i = conf->raid_disks ; i-- ; ) { |
2997 | set_bit(R5_LOCKED, &sh->dev[i].flags); | 3034 | set_bit(R5_LOCKED, &sh->dev[i].flags); |
@@ -3263,7 +3300,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3263 | { | 3300 | { |
3264 | mddev_t *mddev = q->queuedata; | 3301 | mddev_t *mddev = q->queuedata; |
3265 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3302 | raid5_conf_t *conf = mddev_to_conf(mddev); |
3266 | unsigned int dd_idx, pd_idx; | 3303 | unsigned int dd_idx, pd_idx, qd_idx; |
3267 | struct bio* align_bi; | 3304 | struct bio* align_bi; |
3268 | mdk_rdev_t *rdev; | 3305 | mdk_rdev_t *rdev; |
3269 | 3306 | ||
@@ -3288,7 +3325,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3288 | */ | 3325 | */ |
3289 | align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, | 3326 | align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector, |
3290 | 0, | 3327 | 0, |
3291 | &dd_idx, &pd_idx); | 3328 | &dd_idx, &pd_idx, &qd_idx); |
3292 | 3329 | ||
3293 | rcu_read_lock(); | 3330 | rcu_read_lock(); |
3294 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); | 3331 | rdev = rcu_dereference(conf->disks[dd_idx].rdev); |
@@ -3380,7 +3417,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3380 | { | 3417 | { |
3381 | mddev_t *mddev = q->queuedata; | 3418 | mddev_t *mddev = q->queuedata; |
3382 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3419 | raid5_conf_t *conf = mddev_to_conf(mddev); |
3383 | unsigned int dd_idx, pd_idx; | 3420 | int dd_idx, pd_idx, qd_idx; |
3384 | sector_t new_sector; | 3421 | sector_t new_sector; |
3385 | sector_t logical_sector, last_sector; | 3422 | sector_t logical_sector, last_sector; |
3386 | struct stripe_head *sh; | 3423 | struct stripe_head *sh; |
@@ -3447,7 +3484,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3447 | 3484 | ||
3448 | new_sector = raid5_compute_sector(conf, logical_sector, | 3485 | new_sector = raid5_compute_sector(conf, logical_sector, |
3449 | previous, | 3486 | previous, |
3450 | &dd_idx, &pd_idx); | 3487 | &dd_idx, &pd_idx, &qd_idx); |
3451 | pr_debug("raid5: make_request, sector %llu logical %llu\n", | 3488 | pr_debug("raid5: make_request, sector %llu logical %llu\n", |
3452 | (unsigned long long)new_sector, | 3489 | (unsigned long long)new_sector, |
3453 | (unsigned long long)logical_sector); | 3490 | (unsigned long long)logical_sector); |
@@ -3535,7 +3572,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3535 | */ | 3572 | */ |
3536 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; | 3573 | raid5_conf_t *conf = (raid5_conf_t *) mddev->private; |
3537 | struct stripe_head *sh; | 3574 | struct stripe_head *sh; |
3538 | int pd_idx; | 3575 | int pd_idx, qd_idx; |
3539 | sector_t first_sector, last_sector; | 3576 | sector_t first_sector, last_sector; |
3540 | int raid_disks = conf->previous_raid_disks; | 3577 | int raid_disks = conf->previous_raid_disks; |
3541 | int data_disks = raid_disks - conf->max_degraded; | 3578 | int data_disks = raid_disks - conf->max_degraded; |
@@ -3598,7 +3635,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3598 | if (j == sh->pd_idx) | 3635 | if (j == sh->pd_idx) |
3599 | continue; | 3636 | continue; |
3600 | if (conf->level == 6 && | 3637 | if (conf->level == 6 && |
3601 | j == raid6_next_disk(sh->pd_idx, sh->disks)) | 3638 | j == sh->qd_idx) |
3602 | continue; | 3639 | continue; |
3603 | s = compute_blocknr(sh, j); | 3640 | s = compute_blocknr(sh, j); |
3604 | if (s < mddev->array_sectors) { | 3641 | if (s < mddev->array_sectors) { |
@@ -3625,11 +3662,11 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3625 | */ | 3662 | */ |
3626 | first_sector = | 3663 | first_sector = |
3627 | raid5_compute_sector(conf, sector_nr*(new_data_disks), | 3664 | raid5_compute_sector(conf, sector_nr*(new_data_disks), |
3628 | 1, &dd_idx, &pd_idx); | 3665 | 1, &dd_idx, &pd_idx, &qd_idx); |
3629 | last_sector = | 3666 | last_sector = |
3630 | raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512) | 3667 | raid5_compute_sector(conf, ((sector_nr+conf->chunk_size/512) |
3631 | *(new_data_disks) - 1), | 3668 | *(new_data_disks) - 1), |
3632 | 1, &dd_idx, &pd_idx); | 3669 | 1, &dd_idx, &pd_idx, &qd_idx); |
3633 | if (last_sector >= mddev->dev_sectors) | 3670 | if (last_sector >= mddev->dev_sectors) |
3634 | last_sector = mddev->dev_sectors - 1; | 3671 | last_sector = mddev->dev_sectors - 1; |
3635 | while (first_sector <= last_sector) { | 3672 | while (first_sector <= last_sector) { |
@@ -3764,7 +3801,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3764 | * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. | 3801 | * it will be only one 'dd_idx' and only need one call to raid5_compute_sector. |
3765 | */ | 3802 | */ |
3766 | struct stripe_head *sh; | 3803 | struct stripe_head *sh; |
3767 | int dd_idx, pd_idx; | 3804 | int dd_idx, pd_idx, qd_idx; |
3768 | sector_t sector, logical_sector, last_sector; | 3805 | sector_t sector, logical_sector, last_sector; |
3769 | int scnt = 0; | 3806 | int scnt = 0; |
3770 | int remaining; | 3807 | int remaining; |
@@ -3772,7 +3809,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
3772 | 3809 | ||
3773 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); | 3810 | logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1); |
3774 | sector = raid5_compute_sector(conf, logical_sector, | 3811 | sector = raid5_compute_sector(conf, logical_sector, |
3775 | 0, &dd_idx, &pd_idx); | 3812 | 0, &dd_idx, &pd_idx, &qd_idx); |
3776 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); | 3813 | last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9); |
3777 | 3814 | ||
3778 | for (; logical_sector < last_sector; | 3815 | for (; logical_sector < last_sector; |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 0ed22dff56e0..0c7375ad12bd 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -196,15 +196,16 @@ enum reconstruct_states { | |||
196 | 196 | ||
197 | struct stripe_head { | 197 | struct stripe_head { |
198 | struct hlist_node hash; | 198 | struct hlist_node hash; |
199 | struct list_head lru; /* inactive_list or handle_list */ | 199 | struct list_head lru; /* inactive_list or handle_list */ |
200 | struct raid5_private_data *raid_conf; | 200 | struct raid5_private_data *raid_conf; |
201 | sector_t sector; /* sector of this row */ | 201 | sector_t sector; /* sector of this row */ |
202 | int pd_idx; /* parity disk index */ | 202 | short pd_idx; /* parity disk index */ |
203 | unsigned long state; /* state flags */ | 203 | short qd_idx; /* 'Q' disk index for raid6 */ |
204 | atomic_t count; /* nr of active thread/requests */ | 204 | unsigned long state; /* state flags */ |
205 | atomic_t count; /* nr of active thread/requests */ | ||
205 | spinlock_t lock; | 206 | spinlock_t lock; |
206 | int bm_seq; /* sequence number for bitmap flushes */ | 207 | int bm_seq; /* sequence number for bitmap flushes */ |
207 | int disks; /* disks in stripe */ | 208 | int disks; /* disks in stripe */ |
208 | enum check_states check_state; | 209 | enum check_states check_state; |
209 | enum reconstruct_states reconstruct_state; | 210 | enum reconstruct_states reconstruct_state; |
210 | /* stripe_operations | 211 | /* stripe_operations |