aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
committerNeilBrown <neilb@suse.de>2009-03-30 23:39:38 -0400
commit99c0fb5f92828ae96909d390f2df137b89093b37 (patch)
tree67757972da005990d619b810c3b75fb8b6c9969f
parent911d4ee8536d89ea8a6cd3e96b1c95a3ebc5ea66 (diff)
md/raid5: Add support for new layouts for raid5 and raid6.
DDF uses different layouts for P and Q blocks than current md/raid6 so add those that are missing. Also add support for RAID6 layouts that are identical to various raid5 layouts with the simple addition of one device to hold all of the 'Q' blocks. Finally add 'raid5' layouts to match raid4. These last to will allow online level conversion. Note that this does not provide correct support for DDF/raid6 yet as the order in which data blocks are summed to produce the Q block is significant and different between current md code and DDF requirements. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c151
-rw-r--r--drivers/md/raid5.h61
2 files changed, 193 insertions, 19 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2e2e64f6ef71..c1d94ed9718b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1098,7 +1098,7 @@ static void shrink_stripes(raid5_conf_t *conf)
1098 1098
1099static void raid5_end_read_request(struct bio * bi, int error) 1099static void raid5_end_read_request(struct bio * bi, int error)
1100{ 1100{
1101 struct stripe_head *sh = bi->bi_private; 1101 struct stripe_head *sh = bi->bi_private;
1102 raid5_conf_t *conf = sh->raid_conf; 1102 raid5_conf_t *conf = sh->raid_conf;
1103 int disks = sh->disks, i; 1103 int disks = sh->disks, i;
1104 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 1104 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1180,7 +1180,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1180 1180
1181static void raid5_end_write_request(struct bio *bi, int error) 1181static void raid5_end_write_request(struct bio *bi, int error)
1182{ 1182{
1183 struct stripe_head *sh = bi->bi_private; 1183 struct stripe_head *sh = bi->bi_private;
1184 raid5_conf_t *conf = sh->raid_conf; 1184 raid5_conf_t *conf = sh->raid_conf;
1185 int disks = sh->disks, i; 1185 int disks = sh->disks, i;
1186 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); 1186 int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1320,20 +1320,27 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1320 pd_idx = stripe % raid_disks; 1320 pd_idx = stripe % raid_disks;
1321 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; 1321 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1322 break; 1322 break;
1323 case ALGORITHM_PARITY_0:
1324 pd_idx = 0;
1325 (*dd_idx)++;
1326 break;
1327 case ALGORITHM_PARITY_N:
1328 pd_idx = data_disks;
1329 break;
1323 default: 1330 default:
1324 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 1331 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
1325 conf->algorithm); 1332 conf->algorithm);
1333 BUG();
1326 } 1334 }
1327 break; 1335 break;
1328 case 6: 1336 case 6:
1329 1337
1330 /**** FIX THIS ****/
1331 switch (conf->algorithm) { 1338 switch (conf->algorithm) {
1332 case ALGORITHM_LEFT_ASYMMETRIC: 1339 case ALGORITHM_LEFT_ASYMMETRIC:
1333 pd_idx = raid_disks - 1 - (stripe % raid_disks); 1340 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1334 qd_idx = pd_idx + 1; 1341 qd_idx = pd_idx + 1;
1335 if (pd_idx == raid_disks-1) { 1342 if (pd_idx == raid_disks-1) {
1336 (*dd_idx)++; /* Q D D D P */ 1343 (*dd_idx)++; /* Q D D D P */
1337 qd_idx = 0; 1344 qd_idx = 0;
1338 } else if (*dd_idx >= pd_idx) 1345 } else if (*dd_idx >= pd_idx)
1339 (*dd_idx) += 2; /* D D P Q D */ 1346 (*dd_idx) += 2; /* D D P Q D */
@@ -1342,7 +1349,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1342 pd_idx = stripe % raid_disks; 1349 pd_idx = stripe % raid_disks;
1343 qd_idx = pd_idx + 1; 1350 qd_idx = pd_idx + 1;
1344 if (pd_idx == raid_disks-1) { 1351 if (pd_idx == raid_disks-1) {
1345 (*dd_idx)++; /* Q D D D P */ 1352 (*dd_idx)++; /* Q D D D P */
1346 qd_idx = 0; 1353 qd_idx = 0;
1347 } else if (*dd_idx >= pd_idx) 1354 } else if (*dd_idx >= pd_idx)
1348 (*dd_idx) += 2; /* D D P Q D */ 1355 (*dd_idx) += 2; /* D D P Q D */
@@ -1357,9 +1364,89 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1357 qd_idx = (pd_idx + 1) % raid_disks; 1364 qd_idx = (pd_idx + 1) % raid_disks;
1358 *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; 1365 *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
1359 break; 1366 break;
1367
1368 case ALGORITHM_PARITY_0:
1369 pd_idx = 0;
1370 qd_idx = 1;
1371 (*dd_idx) += 2;
1372 break;
1373 case ALGORITHM_PARITY_N:
1374 pd_idx = data_disks;
1375 qd_idx = data_disks + 1;
1376 break;
1377
1378 case ALGORITHM_ROTATING_ZERO_RESTART:
1379 /* Exactly the same as RIGHT_ASYMMETRIC, but or
1380 * of blocks for computing Q is different.
1381 */
1382 pd_idx = stripe % raid_disks;
1383 qd_idx = pd_idx + 1;
1384 if (pd_idx == raid_disks-1) {
1385 (*dd_idx)++; /* Q D D D P */
1386 qd_idx = 0;
1387 } else if (*dd_idx >= pd_idx)
1388 (*dd_idx) += 2; /* D D P Q D */
1389 break;
1390
1391 case ALGORITHM_ROTATING_N_RESTART:
1392 /* Same a left_asymmetric, by first stripe is
1393 * D D D P Q rather than
1394 * Q D D D P
1395 */
1396 pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
1397 qd_idx = pd_idx + 1;
1398 if (pd_idx == raid_disks-1) {
1399 (*dd_idx)++; /* Q D D D P */
1400 qd_idx = 0;
1401 } else if (*dd_idx >= pd_idx)
1402 (*dd_idx) += 2; /* D D P Q D */
1403 break;
1404
1405 case ALGORITHM_ROTATING_N_CONTINUE:
1406 /* Same as left_symmetric but Q is before P */
1407 pd_idx = raid_disks - 1 - (stripe % raid_disks);
1408 qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
1409 *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
1410 break;
1411
1412 case ALGORITHM_LEFT_ASYMMETRIC_6:
1413 /* RAID5 left_asymmetric, with Q on last device */
1414 pd_idx = data_disks - stripe % (raid_disks-1);
1415 if (*dd_idx >= pd_idx)
1416 (*dd_idx)++;
1417 qd_idx = raid_disks - 1;
1418 break;
1419
1420 case ALGORITHM_RIGHT_ASYMMETRIC_6:
1421 pd_idx = stripe % (raid_disks-1);
1422 if (*dd_idx >= pd_idx)
1423 (*dd_idx)++;
1424 qd_idx = raid_disks - 1;
1425 break;
1426
1427 case ALGORITHM_LEFT_SYMMETRIC_6:
1428 pd_idx = data_disks - stripe % (raid_disks-1);
1429 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
1430 qd_idx = raid_disks - 1;
1431 break;
1432
1433 case ALGORITHM_RIGHT_SYMMETRIC_6:
1434 pd_idx = stripe % (raid_disks-1);
1435 *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
1436 qd_idx = raid_disks - 1;
1437 break;
1438
1439 case ALGORITHM_PARITY_0_6:
1440 pd_idx = 0;
1441 (*dd_idx)++;
1442 qd_idx = raid_disks - 1;
1443 break;
1444
1445
1360 default: 1446 default:
1361 printk(KERN_CRIT "raid6: unsupported algorithm %d\n", 1447 printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
1362 conf->algorithm); 1448 conf->algorithm);
1449 BUG();
1363 } 1450 }
1364 break; 1451 break;
1365 } 1452 }
@@ -1411,9 +1498,15 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1411 i += raid_disks; 1498 i += raid_disks;
1412 i -= (sh->pd_idx + 1); 1499 i -= (sh->pd_idx + 1);
1413 break; 1500 break;
1501 case ALGORITHM_PARITY_0:
1502 i -= 1;
1503 break;
1504 case ALGORITHM_PARITY_N:
1505 break;
1414 default: 1506 default:
1415 printk(KERN_ERR "raid5: unsupported algorithm %d\n", 1507 printk(KERN_ERR "raid5: unsupported algorithm %d\n",
1416 conf->algorithm); 1508 conf->algorithm);
1509 BUG();
1417 } 1510 }
1418 break; 1511 break;
1419 case 6: 1512 case 6:
@@ -1422,8 +1515,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1422 switch (conf->algorithm) { 1515 switch (conf->algorithm) {
1423 case ALGORITHM_LEFT_ASYMMETRIC: 1516 case ALGORITHM_LEFT_ASYMMETRIC:
1424 case ALGORITHM_RIGHT_ASYMMETRIC: 1517 case ALGORITHM_RIGHT_ASYMMETRIC:
1425 if (sh->pd_idx == raid_disks-1) 1518 case ALGORITHM_ROTATING_ZERO_RESTART:
1426 i--; /* Q D D D P */ 1519 case ALGORITHM_ROTATING_N_RESTART:
1520 if (sh->pd_idx == raid_disks-1)
1521 i--; /* Q D D D P */
1427 else if (i > sh->pd_idx) 1522 else if (i > sh->pd_idx)
1428 i -= 2; /* D D P Q D */ 1523 i -= 2; /* D D P Q D */
1429 break; 1524 break;
@@ -1438,9 +1533,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
1438 i -= (sh->pd_idx + 2); 1533 i -= (sh->pd_idx + 2);
1439 } 1534 }
1440 break; 1535 break;
1536 case ALGORITHM_PARITY_0:
1537 i -= 2;
1538 break;
1539 case ALGORITHM_PARITY_N:
1540 break;
1541 case ALGORITHM_ROTATING_N_CONTINUE:
1542 if (sh->pd_idx == 0)
1543 i--; /* P D D D Q */
1544 else if (i > sh->pd_idx)
1545 i -= 2; /* D D Q P D */
1546 break;
1547 case ALGORITHM_LEFT_ASYMMETRIC_6:
1548 case ALGORITHM_RIGHT_ASYMMETRIC_6:
1549 if (i > sh->pd_idx)
1550 i--;
1551 break;
1552 case ALGORITHM_LEFT_SYMMETRIC_6:
1553 case ALGORITHM_RIGHT_SYMMETRIC_6:
1554 if (i < sh->pd_idx)
1555 i += data_disks + 1;
1556 i -= (sh->pd_idx + 1);
1557 break;
1558 case ALGORITHM_PARITY_0_6:
1559 i -= 1;
1560 break;
1441 default: 1561 default:
1442 printk(KERN_CRIT "raid6: unsupported algorithm %d\n", 1562 printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
1443 conf->algorithm); 1563 conf->algorithm);
1564 BUG();
1444 } 1565 }
1445 break; 1566 break;
1446 } 1567 }
@@ -3308,7 +3429,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3308 return 0; 3429 return 0;
3309 } 3430 }
3310 /* 3431 /*
3311 * use bio_clone to make a copy of the bio 3432 * use bio_clone to make a copy of the bio
3312 */ 3433 */
3313 align_bi = bio_clone(raid_bio, GFP_NOIO); 3434 align_bi = bio_clone(raid_bio, GFP_NOIO);
3314 if (!align_bi) 3435 if (!align_bi)
@@ -3439,7 +3560,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3439 if (rw == READ && 3560 if (rw == READ &&
3440 mddev->reshape_position == MaxSector && 3561 mddev->reshape_position == MaxSector &&
3441 chunk_aligned_read(q,bi)) 3562 chunk_aligned_read(q,bi))
3442 return 0; 3563 return 0;
3443 3564
3444 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1); 3565 logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
3445 last_sector = bi->bi_sector + (bi->bi_size>>9); 3566 last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -4034,6 +4155,12 @@ static int run(mddev_t *mddev)
4034 mdname(mddev), mddev->level); 4155 mdname(mddev), mddev->level);
4035 return -EIO; 4156 return -EIO;
4036 } 4157 }
4158 if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) ||
4159 (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) {
4160 printk(KERN_ERR "raid5: %s: layout %d not supported\n",
4161 mdname(mddev), mddev->layout);
4162 return -EIO;
4163 }
4037 4164
4038 if (mddev->chunk_size < PAGE_SIZE) { 4165 if (mddev->chunk_size < PAGE_SIZE) {
4039 printk(KERN_ERR "md/raid5: chunk_size must be at least " 4166 printk(KERN_ERR "md/raid5: chunk_size must be at least "
@@ -4185,12 +4312,6 @@ static int run(mddev_t *mddev)
4185 conf->chunk_size, mdname(mddev)); 4312 conf->chunk_size, mdname(mddev));
4186 goto abort; 4313 goto abort;
4187 } 4314 }
4188 if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
4189 printk(KERN_ERR
4190 "raid5: unsupported parity algorithm %d for %s\n",
4191 conf->algorithm, mdname(mddev));
4192 goto abort;
4193 }
4194 if (mddev->degraded > conf->max_degraded) { 4315 if (mddev->degraded > conf->max_degraded) {
4195 printk(KERN_ERR "raid5: not enough operational devices for %s" 4316 printk(KERN_ERR "raid5: not enough operational devices for %s"
4196 " (%d/%d failed)\n", 4317 " (%d/%d failed)\n",
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 0c7375ad12bd..633d79289616 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -394,9 +394,62 @@ typedef struct raid5_private_data raid5_conf_t;
394/* 394/*
395 * Our supported algorithms 395 * Our supported algorithms
396 */ 396 */
397#define ALGORITHM_LEFT_ASYMMETRIC 0 397#define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */
398#define ALGORITHM_RIGHT_ASYMMETRIC 1 398#define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */
399#define ALGORITHM_LEFT_SYMMETRIC 2 399#define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */
400#define ALGORITHM_RIGHT_SYMMETRIC 3 400#define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */
401 401
402/* Define non-rotating (raid4) algorithms. These allow
403 * conversion of raid4 to raid5.
404 */
405#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
406#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
407
408/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
409 * Firstly, the exact positioning of the parity block is slightly
410 * different between the 'LEFT_*' modes of md and the "_N_*" modes
411 * of DDF.
412 * Secondly, or order of datablocks over which the Q syndrome is computed
413 * is different.
414 * Consequently we have different layouts for DDF/raid6 than md/raid6.
415 * These layouts are from the DDFv1.2 spec.
416 * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
417 * leaves RLQ=3 as 'Vendor Specific'
418 */
419
420#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
421#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
422#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
423
424
425/* For every RAID5 algorithm we define a RAID6 algorithm
426 * with exactly the same layout for data and parity, and
427 * with the Q block always on the last device (N-1).
428 * This allows trivial conversion from RAID5 to RAID6
429 */
430#define ALGORITHM_LEFT_ASYMMETRIC_6 16
431#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
432#define ALGORITHM_LEFT_SYMMETRIC_6 18
433#define ALGORITHM_RIGHT_SYMMETRIC_6 19
434#define ALGORITHM_PARITY_0_6 20
435#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
436
437static inline int algorithm_valid_raid5(int layout)
438{
439 return (layout >= 0) &&
440 (layout <= 5);
441}
442static inline int algorithm_valid_raid6(int layout)
443{
444 return (layout >= 0 && layout <= 5)
445 ||
446 (layout == 8 || layout == 10)
447 ||
448 (layout >= 16 && layout <= 20);
449}
450
451static inline int algorithm_is_DDF(int layout)
452{
453 return layout >= 8 && layout <= 10;
454}
402#endif 455#endif