diff options
author | NeilBrown <neilb@suse.de> | 2009-03-30 23:39:38 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-03-30 23:39:38 -0400 |
commit | 67cc2b8165857ba019920d1f00d64bcc4140075d (patch) | |
tree | ff8e8eed440640acfa561160115ac44aedf811c2 | |
parent | 99c0fb5f92828ae96909d390f2df137b89093b37 (diff) |
md/raid5: finish support for DDF/raid6
DDF requires RAID6 calculations over different devices in a different
order.
For md/raid6, we calculate over just the data devices, starting
immediately after the 'Q' block.
For ddf/raid6 we calculate over all devices, using zeros in place of
the P and Q blocks.
This requires unfortunately complex loops...
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 58 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
2 files changed, 41 insertions, 18 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c1d94ed9718b..edbc80c4d346 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -136,6 +136,10 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | |||
136 | /* Find first data disk in a raid6 stripe */ | 136 | /* Find first data disk in a raid6 stripe */ |
137 | static inline int raid6_d0(struct stripe_head *sh) | 137 | static inline int raid6_d0(struct stripe_head *sh) |
138 | { | 138 | { |
139 | if (sh->ddf_layout) | ||
140 | /* ddf always start from first device */ | ||
141 | return 0; | ||
142 | /* md starts just after Q block */ | ||
139 | if (sh->qd_idx == sh->disks - 1) | 143 | if (sh->qd_idx == sh->disks - 1) |
140 | return 0; | 144 | return 0; |
141 | else | 145 | else |
@@ -152,13 +156,15 @@ static inline int raid6_next_disk(int disk, int raid_disks) | |||
152 | * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk | 156 | * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk |
153 | * is raid_disks-1. This help does that mapping. | 157 | * is raid_disks-1. This help does that mapping. |
154 | */ | 158 | */ |
155 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count) | 159 | static int raid6_idx_to_slot(int idx, struct stripe_head *sh, |
160 | int *count, int syndrome_disks) | ||
156 | { | 161 | { |
157 | int slot; | 162 | int slot; |
163 | |||
158 | if (idx == sh->pd_idx) | 164 | if (idx == sh->pd_idx) |
159 | return sh->disks - 2; | 165 | return syndrome_disks; |
160 | if (idx == sh->qd_idx) | 166 | if (idx == sh->qd_idx) |
161 | return sh->disks - 1; | 167 | return syndrome_disks + 1; |
162 | slot = (*count)++; | 168 | slot = (*count)++; |
163 | return slot; | 169 | return slot; |
164 | } | 170 | } |
@@ -1267,6 +1273,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1267 | unsigned long chunk_number; | 1273 | unsigned long chunk_number; |
1268 | unsigned int chunk_offset; | 1274 | unsigned int chunk_offset; |
1269 | int pd_idx, qd_idx; | 1275 | int pd_idx, qd_idx; |
1276 | int ddf_layout = 0; | ||
1270 | sector_t new_sector; | 1277 | sector_t new_sector; |
1271 | int sectors_per_chunk = conf->chunk_size >> 9; | 1278 | int sectors_per_chunk = conf->chunk_size >> 9; |
1272 | int raid_disks = previous ? conf->previous_raid_disks | 1279 | int raid_disks = previous ? conf->previous_raid_disks |
@@ -1386,6 +1393,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1386 | qd_idx = 0; | 1393 | qd_idx = 0; |
1387 | } else if (*dd_idx >= pd_idx) | 1394 | } else if (*dd_idx >= pd_idx) |
1388 | (*dd_idx) += 2; /* D D P Q D */ | 1395 | (*dd_idx) += 2; /* D D P Q D */ |
1396 | ddf_layout = 1; | ||
1389 | break; | 1397 | break; |
1390 | 1398 | ||
1391 | case ALGORITHM_ROTATING_N_RESTART: | 1399 | case ALGORITHM_ROTATING_N_RESTART: |
@@ -1400,6 +1408,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1400 | qd_idx = 0; | 1408 | qd_idx = 0; |
1401 | } else if (*dd_idx >= pd_idx) | 1409 | } else if (*dd_idx >= pd_idx) |
1402 | (*dd_idx) += 2; /* D D P Q D */ | 1410 | (*dd_idx) += 2; /* D D P Q D */ |
1411 | ddf_layout = 1; | ||
1403 | break; | 1412 | break; |
1404 | 1413 | ||
1405 | case ALGORITHM_ROTATING_N_CONTINUE: | 1414 | case ALGORITHM_ROTATING_N_CONTINUE: |
@@ -1407,6 +1416,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1407 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1416 | pd_idx = raid_disks - 1 - (stripe % raid_disks); |
1408 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; | 1417 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; |
1409 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | 1418 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1419 | ddf_layout = 1; | ||
1410 | break; | 1420 | break; |
1411 | 1421 | ||
1412 | case ALGORITHM_LEFT_ASYMMETRIC_6: | 1422 | case ALGORITHM_LEFT_ASYMMETRIC_6: |
@@ -1454,6 +1464,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1454 | if (sh) { | 1464 | if (sh) { |
1455 | sh->pd_idx = pd_idx; | 1465 | sh->pd_idx = pd_idx; |
1456 | sh->qd_idx = qd_idx; | 1466 | sh->qd_idx = qd_idx; |
1467 | sh->ddf_layout = ddf_layout; | ||
1457 | } | 1468 | } |
1458 | /* | 1469 | /* |
1459 | * Finally, compute the new sector number | 1470 | * Finally, compute the new sector number |
@@ -1642,9 +1653,10 @@ static void compute_parity6(struct stripe_head *sh, int method) | |||
1642 | { | 1653 | { |
1643 | raid5_conf_t *conf = sh->raid_conf; | 1654 | raid5_conf_t *conf = sh->raid_conf; |
1644 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; | 1655 | int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; |
1656 | int syndrome_disks = sh->ddf_layout ? disks : (disks - 2); | ||
1645 | struct bio *chosen; | 1657 | struct bio *chosen; |
1646 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1658 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1647 | void *ptrs[disks]; | 1659 | void *ptrs[syndrome_disks+2]; |
1648 | 1660 | ||
1649 | pd_idx = sh->pd_idx; | 1661 | pd_idx = sh->pd_idx; |
1650 | qd_idx = sh->qd_idx; | 1662 | qd_idx = sh->qd_idx; |
@@ -1687,23 +1699,28 @@ static void compute_parity6(struct stripe_head *sh, int method) | |||
1687 | } | 1699 | } |
1688 | 1700 | ||
1689 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ | 1701 | /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ |
1690 | /* FIX: Is this ordering of drives even remotely optimal? */ | 1702 | |
1703 | for (i = 0; i < disks; i++) | ||
1704 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1705 | |||
1691 | count = 0; | 1706 | count = 0; |
1692 | i = d0_idx; | 1707 | i = d0_idx; |
1693 | do { | 1708 | do { |
1694 | int slot = raid6_idx_to_slot(i, sh, &count); | 1709 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); |
1710 | |||
1695 | ptrs[slot] = page_address(sh->dev[i].page); | 1711 | ptrs[slot] = page_address(sh->dev[i].page); |
1696 | if (slot < sh->disks - 2 && | 1712 | if (slot < syndrome_disks && |
1697 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { | 1713 | !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { |
1698 | printk(KERN_ERR "block %d/%d not uptodate " | 1714 | printk(KERN_ERR "block %d/%d not uptodate " |
1699 | "on parity calc\n", i, count); | 1715 | "on parity calc\n", i, count); |
1700 | BUG(); | 1716 | BUG(); |
1701 | } | 1717 | } |
1718 | |||
1702 | i = raid6_next_disk(i, disks); | 1719 | i = raid6_next_disk(i, disks); |
1703 | } while (i != d0_idx); | 1720 | } while (i != d0_idx); |
1704 | BUG_ON(count+2 != disks); | 1721 | BUG_ON(count != syndrome_disks); |
1705 | 1722 | ||
1706 | raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); | 1723 | raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs); |
1707 | 1724 | ||
1708 | switch(method) { | 1725 | switch(method) { |
1709 | case RECONSTRUCT_WRITE: | 1726 | case RECONSTRUCT_WRITE: |
@@ -1761,24 +1778,28 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) | |||
1761 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | 1778 | static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) |
1762 | { | 1779 | { |
1763 | int i, count, disks = sh->disks; | 1780 | int i, count, disks = sh->disks; |
1781 | int syndrome_disks = sh->ddf_layout ? disks : disks-2; | ||
1764 | int d0_idx = raid6_d0(sh); | 1782 | int d0_idx = raid6_d0(sh); |
1765 | int faila = -1, failb = -1; | 1783 | int faila = -1, failb = -1; |
1766 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ | 1784 | /**** FIX THIS: This could be very bad if disks is close to 256 ****/ |
1767 | void *ptrs[disks]; | 1785 | void *ptrs[syndrome_disks+2]; |
1768 | 1786 | ||
1787 | for (i = 0; i < disks ; i++) | ||
1788 | ptrs[i] = (void *)raid6_empty_zero_page; | ||
1769 | count = 0; | 1789 | count = 0; |
1770 | i = d0_idx; | 1790 | i = d0_idx; |
1771 | do { | 1791 | do { |
1772 | int slot; | 1792 | int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks); |
1773 | slot = raid6_idx_to_slot(i, sh, &count); | 1793 | |
1774 | ptrs[slot] = page_address(sh->dev[i].page); | 1794 | ptrs[slot] = page_address(sh->dev[i].page); |
1795 | |||
1775 | if (i == dd_idx1) | 1796 | if (i == dd_idx1) |
1776 | faila = slot; | 1797 | faila = slot; |
1777 | if (i == dd_idx2) | 1798 | if (i == dd_idx2) |
1778 | failb = slot; | 1799 | failb = slot; |
1779 | i = raid6_next_disk(i, disks); | 1800 | i = raid6_next_disk(i, disks); |
1780 | } while (i != d0_idx); | 1801 | } while (i != d0_idx); |
1781 | BUG_ON(count+2 != disks); | 1802 | BUG_ON(count != syndrome_disks); |
1782 | 1803 | ||
1783 | BUG_ON(faila == failb); | 1804 | BUG_ON(faila == failb); |
1784 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } | 1805 | if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } |
@@ -1787,9 +1808,9 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |||
1787 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, | 1808 | (unsigned long long)sh->sector, dd_idx1, dd_idx2, |
1788 | faila, failb); | 1809 | faila, failb); |
1789 | 1810 | ||
1790 | if ( failb == disks-1 ) { | 1811 | if (failb == syndrome_disks+1) { |
1791 | /* Q disk is one of the missing disks */ | 1812 | /* Q disk is one of the missing disks */ |
1792 | if ( faila == disks-2 ) { | 1813 | if (faila == syndrome_disks) { |
1793 | /* Missing P+Q, just recompute */ | 1814 | /* Missing P+Q, just recompute */ |
1794 | compute_parity6(sh, UPDATE_PARITY); | 1815 | compute_parity6(sh, UPDATE_PARITY); |
1795 | return; | 1816 | return; |
@@ -1804,12 +1825,13 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |||
1804 | } | 1825 | } |
1805 | 1826 | ||
1806 | /* We're missing D+P or D+D; */ | 1827 | /* We're missing D+P or D+D; */ |
1807 | if (failb == disks-2) { | 1828 | if (failb == syndrome_disks) { |
1808 | /* We're missing D+P. */ | 1829 | /* We're missing D+P. */ |
1809 | raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); | 1830 | raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs); |
1810 | } else { | 1831 | } else { |
1811 | /* We're missing D+D. */ | 1832 | /* We're missing D+D. */ |
1812 | raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); | 1833 | raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb, |
1834 | ptrs); | ||
1813 | } | 1835 | } |
1814 | 1836 | ||
1815 | /* Both the above update both missing blocks */ | 1837 | /* Both the above update both missing blocks */ |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 633d79289616..84456b1af204 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -201,6 +201,7 @@ struct stripe_head { | |||
201 | sector_t sector; /* sector of this row */ | 201 | sector_t sector; /* sector of this row */ |
202 | short pd_idx; /* parity disk index */ | 202 | short pd_idx; /* parity disk index */ |
203 | short qd_idx; /* 'Q' disk index for raid6 */ | 203 | short qd_idx; /* 'Q' disk index for raid6 */ |
204 | short ddf_layout;/* use DDF ordering to calculate Q */ | ||
204 | unsigned long state; /* state flags */ | 205 | unsigned long state; /* state flags */ |
205 | atomic_t count; /* nr of active thread/requests */ | 206 | atomic_t count; /* nr of active thread/requests */ |
206 | spinlock_t lock; | 207 | spinlock_t lock; |