aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
committerNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
commit67cc2b8165857ba019920d1f00d64bcc4140075d (patch)
treeff8e8eed440640acfa561160115ac44aedf811c2
parent99c0fb5f92828ae96909d390f2df137b89093b37 (diff)
md/raid5: finish support for DDF/raid6
DDF requires RAID6 calculations over different devices in a different order. For md/raid6, we calculate over just the data devices, starting immediately after the 'Q' block. For ddf/raid6 we calculate over all devices, using zeros in place of the P and Q blocks. This requires unfortunately complex loops... Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c58
-rw-r--r--drivers/md/raid5.h1
2 files changed, 41 insertions, 18 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c1d94ed9718b..edbc80c4d346 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -136,6 +136,10 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
136/* Find first data disk in a raid6 stripe */ 136/* Find first data disk in a raid6 stripe */
137static inline int raid6_d0(struct stripe_head *sh) 137static inline int raid6_d0(struct stripe_head *sh)
138{ 138{
139 if (sh->ddf_layout)
140 /* ddf always start from first device */
141 return 0;
142 /* md starts just after Q block */
139 if (sh->qd_idx == sh->disks - 1) 143 if (sh->qd_idx == sh->disks - 1)
140 return 0; 144 return 0;
141 else 145 else
@@ -152,13 +156,15 @@ static inline int raid6_next_disk(int disk, int raid_disks)
152 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk 156 * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
153 * is raid_disks-1. This help does that mapping. 157 * is raid_disks-1. This help does that mapping.
154 */ 158 */
155static int raid6_idx_to_slot(int idx, struct stripe_head *sh, int *count) 159static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
160 int *count, int syndrome_disks)
156{ 161{
157 int slot; 162 int slot;
163
158 if (idx == sh->pd_idx) 164 if (idx == sh->pd_idx)
159 return sh->disks - 2; 165 return syndrome_disks;
160 if (idx == sh->qd_idx) 166 if (idx == sh->qd_idx)
161 return sh->disks - 1; 167 return syndrome_disks + 1;
162 slot = (*count)++; 168 slot = (*count)++;
163 return slot; 169 return slot;
164} 170}
@@ -1267,6 +1273,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1267 unsigned long chunk_number; 1273 unsigned long chunk_number;
1268 unsigned int chunk_offset; 1274 unsigned int chunk_offset;
1269 int pd_idx, qd_idx; 1275 int pd_idx, qd_idx;
1276 int ddf_layout = 0;
1270 sector_t new_sector; 1277 sector_t new_sector;
1271 int sectors_per_chunk = conf->chunk_size >> 9; 1278 int sectors_per_chunk = conf->chunk_size >> 9;
1272 int raid_disks = previous ? conf->previous_raid_disks 1279 int raid_disks = previous ? conf->previous_raid_disks
@@ -1386,6 +1393,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1386 qd_idx = 0; 1393 qd_idx = 0;
1387 } else if (*dd_idx >= pd_idx) 1394 } else if (*dd_idx >= pd_idx)
1388 (*dd_idx) += 2; /* D D P Q D */ 1395 (*dd_idx) += 2; /* D D P Q D */
1396 ddf_layout = 1;
1389 break; 1397 break;
1390 1398
1391 case ALGORITHM_ROTATING_N_RESTART: 1399 case ALGORITHM_ROTATING_N_RESTART:
@@ -1400,6 +1408,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1400 qd_idx = 0; 1408 qd_idx = 0;
1401 } else if (*dd_idx >= pd_idx) 1409 } else if (*dd_idx >= pd_idx)
1402 (*dd_idx) += 2; /* D D P Q D */ 1410 (*dd_idx) += 2; /* D D P Q D */
1411 ddf_layout = 1;
1403 break; 1412 break;
1404 1413
1405 case ALGORITHM_ROTATING_N_CONTINUE: 1414 case ALGORITHM_ROTATING_N_CONTINUE:
@@ -1407,6 +1416,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1407 pd_idx = raid_disks - 1 - (stripe % raid_disks); 1416 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1408 qd_idx = (pd_idx + raid_disks - 1) % raid_disks; 1417 qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
1409 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; 1418 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1419 ddf_layout = 1;
1410 break; 1420 break;
1411 1421
1412 case ALGORITHM_LEFT_ASYMMETRIC_6: 1422 case ALGORITHM_LEFT_ASYMMETRIC_6:
@@ -1454,6 +1464,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1454 if (sh) { 1464 if (sh) {
1455 sh->pd_idx = pd_idx; 1465 sh->pd_idx = pd_idx;
1456 sh->qd_idx = qd_idx; 1466 sh->qd_idx = qd_idx;
1467 sh->ddf_layout = ddf_layout;
1457 } 1468 }
1458 /* 1469 /*
1459 * Finally, compute the new sector number 1470 * Finally, compute the new sector number
@@ -1642,9 +1653,10 @@ static void compute_parity6(struct stripe_head *sh, int method)
1642{ 1653{
1643 raid5_conf_t *conf = sh->raid_conf; 1654 raid5_conf_t *conf = sh->raid_conf;
1644 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count; 1655 int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
1656 int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
1645 struct bio *chosen; 1657 struct bio *chosen;
1646 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1658 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1647 void *ptrs[disks]; 1659 void *ptrs[syndrome_disks+2];
1648 1660
1649 pd_idx = sh->pd_idx; 1661 pd_idx = sh->pd_idx;
1650 qd_idx = sh->qd_idx; 1662 qd_idx = sh->qd_idx;
@@ -1687,23 +1699,28 @@ static void compute_parity6(struct stripe_head *sh, int method)
1687 } 1699 }
1688 1700
1689 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/ 1701 /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
1690 /* FIX: Is this ordering of drives even remotely optimal? */ 1702
1703 for (i = 0; i < disks; i++)
1704 ptrs[i] = (void *)raid6_empty_zero_page;
1705
1691 count = 0; 1706 count = 0;
1692 i = d0_idx; 1707 i = d0_idx;
1693 do { 1708 do {
1694 int slot = raid6_idx_to_slot(i, sh, &count); 1709 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1710
1695 ptrs[slot] = page_address(sh->dev[i].page); 1711 ptrs[slot] = page_address(sh->dev[i].page);
1696 if (slot < sh->disks - 2 && 1712 if (slot < syndrome_disks &&
1697 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) { 1713 !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
1698 printk(KERN_ERR "block %d/%d not uptodate " 1714 printk(KERN_ERR "block %d/%d not uptodate "
1699 "on parity calc\n", i, count); 1715 "on parity calc\n", i, count);
1700 BUG(); 1716 BUG();
1701 } 1717 }
1718
1702 i = raid6_next_disk(i, disks); 1719 i = raid6_next_disk(i, disks);
1703 } while (i != d0_idx); 1720 } while (i != d0_idx);
1704 BUG_ON(count+2 != disks); 1721 BUG_ON(count != syndrome_disks);
1705 1722
1706 raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs); 1723 raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
1707 1724
1708 switch(method) { 1725 switch(method) {
1709 case RECONSTRUCT_WRITE: 1726 case RECONSTRUCT_WRITE:
@@ -1761,24 +1778,28 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1761static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) 1778static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1762{ 1779{
1763 int i, count, disks = sh->disks; 1780 int i, count, disks = sh->disks;
1781 int syndrome_disks = sh->ddf_layout ? disks : disks-2;
1764 int d0_idx = raid6_d0(sh); 1782 int d0_idx = raid6_d0(sh);
1765 int faila = -1, failb = -1; 1783 int faila = -1, failb = -1;
1766 /**** FIX THIS: This could be very bad if disks is close to 256 ****/ 1784 /**** FIX THIS: This could be very bad if disks is close to 256 ****/
1767 void *ptrs[disks]; 1785 void *ptrs[syndrome_disks+2];
1768 1786
1787 for (i = 0; i < disks ; i++)
1788 ptrs[i] = (void *)raid6_empty_zero_page;
1769 count = 0; 1789 count = 0;
1770 i = d0_idx; 1790 i = d0_idx;
1771 do { 1791 do {
1772 int slot; 1792 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
1773 slot = raid6_idx_to_slot(i, sh, &count); 1793
1774 ptrs[slot] = page_address(sh->dev[i].page); 1794 ptrs[slot] = page_address(sh->dev[i].page);
1795
1775 if (i == dd_idx1) 1796 if (i == dd_idx1)
1776 faila = slot; 1797 faila = slot;
1777 if (i == dd_idx2) 1798 if (i == dd_idx2)
1778 failb = slot; 1799 failb = slot;
1779 i = raid6_next_disk(i, disks); 1800 i = raid6_next_disk(i, disks);
1780 } while (i != d0_idx); 1801 } while (i != d0_idx);
1781 BUG_ON(count+2 != disks); 1802 BUG_ON(count != syndrome_disks);
1782 1803
1783 BUG_ON(faila == failb); 1804 BUG_ON(faila == failb);
1784 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } 1805 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
@@ -1787,9 +1808,9 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1787 (unsigned long long)sh->sector, dd_idx1, dd_idx2, 1808 (unsigned long long)sh->sector, dd_idx1, dd_idx2,
1788 faila, failb); 1809 faila, failb);
1789 1810
1790 if ( failb == disks-1 ) { 1811 if (failb == syndrome_disks+1) {
1791 /* Q disk is one of the missing disks */ 1812 /* Q disk is one of the missing disks */
1792 if ( faila == disks-2 ) { 1813 if (faila == syndrome_disks) {
1793 /* Missing P+Q, just recompute */ 1814 /* Missing P+Q, just recompute */
1794 compute_parity6(sh, UPDATE_PARITY); 1815 compute_parity6(sh, UPDATE_PARITY);
1795 return; 1816 return;
@@ -1804,12 +1825,13 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1804 } 1825 }
1805 1826
1806 /* We're missing D+P or D+D; */ 1827 /* We're missing D+P or D+D; */
1807 if (failb == disks-2) { 1828 if (failb == syndrome_disks) {
1808 /* We're missing D+P. */ 1829 /* We're missing D+P. */
1809 raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs); 1830 raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
1810 } else { 1831 } else {
1811 /* We're missing D+D. */ 1832 /* We're missing D+D. */
1812 raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs); 1833 raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
1834 ptrs);
1813 } 1835 }
1814 1836
1815 /* Both the above update both missing blocks */ 1837 /* Both the above update both missing blocks */
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 633d79289616..84456b1af204 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -201,6 +201,7 @@ struct stripe_head {
201 sector_t sector; /* sector of this row */ 201 sector_t sector; /* sector of this row */
202 short pd_idx; /* parity disk index */ 202 short pd_idx; /* parity disk index */
203 short qd_idx; /* 'Q' disk index for raid6 */ 203 short qd_idx; /* 'Q' disk index for raid6 */
204 short ddf_layout;/* use DDF ordering to calculate Q */
204 unsigned long state; /* state flags */ 205 unsigned long state; /* state flags */
205 atomic_t count; /* nr of active thread/requests */ 206 atomic_t count; /* nr of active thread/requests */
206 spinlock_t lock; 207 spinlock_t lock;