aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2015-04-29 08:03:04 -0400
committerMike Snitzer <snitzer@redhat.com>2015-05-29 14:19:00 -0400
commit0cf4503174c12025ac7ea61048cb7c1d4d1ed85c (patch)
tree52b88a85b6bd98f5156ecd2c1e573e04fcb7c59a /drivers/md/dm-raid.c
parentc76d53f43ec4f9b9f200f031d303f21bdf6927d0 (diff)
dm raid: add support for the MD RAID0 personality
Add dm-raid access to the MD RAID0 personality to enable single zone striping. The following changes enable that access: - add type definition to raid_types array - make bitmap creation conditonal in super_validate(), because bitmaps are not allowed in raid0 - set rdev->sectors to the data image size in super_validate() to allow the raid0 personality to calculate the MD array size properly - use mdddev(un)lock() functions instead of direct mutex_(un)lock() (wrapped in here because it's a trivial change) - enhance raid_status() to always report full sync for raid0 so that userspace checks for 100% sync will succeed and allow for resize (and takeover/reshape once added in future paches) - enhance raid_resume() to not load bitmap in case of raid0 - add merge function to avoid data corruption (seen with readahead) that resulted from bio payloads that grew too large. This problem did not occur with the other raid levels because it either did not apply without striping (raid1) or was avoided via stripe caching. - raise version to 1.7.0 because of the raid0 API change Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Reviewed-by: Jonathan Brassow <jbrassow@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c132
1 files changed, 84 insertions, 48 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index af49ddebaa62..2daa67793511 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2010-2011 Neil Brown 2 * Copyright (C) 2010-2011 Neil Brown
3 * Copyright (C) 2010-2014 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
@@ -82,6 +82,7 @@ static struct raid_type {
82 const unsigned level; /* RAID level. */ 82 const unsigned level; /* RAID level. */
83 const unsigned algorithm; /* RAID algorithm. */ 83 const unsigned algorithm; /* RAID algorithm. */
84} raid_types[] = { 84} raid_types[] = {
85 {"raid0", "RAID0 (striping)", 0, 2, 0, 0 /* NONE */},
85 {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, 86 {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
86 {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, 87 {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */},
87 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, 88 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
@@ -719,7 +720,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
719 rs->md.layout = raid10_format_to_md_layout(raid10_format, 720 rs->md.layout = raid10_format_to_md_layout(raid10_format,
720 raid10_copies); 721 raid10_copies);
721 rs->md.new_layout = rs->md.layout; 722 rs->md.new_layout = rs->md.layout;
722 } else if ((rs->raid_type->level > 1) && 723 } else if ((!rs->raid_type->level || rs->raid_type->level > 1) &&
723 sector_div(sectors_per_dev, 724 sector_div(sectors_per_dev,
724 (rs->md.raid_disks - rs->raid_type->parity_devs))) { 725 (rs->md.raid_disks - rs->raid_type->parity_devs))) {
725 rs->ti->error = "Target length not divisible by number of data devices"; 726 rs->ti->error = "Target length not divisible by number of data devices";
@@ -1025,8 +1026,9 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev)
1025 return 0; 1026 return 0;
1026} 1027}
1027 1028
1028static int super_validate(struct mddev *mddev, struct md_rdev *rdev) 1029static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
1029{ 1030{
1031 struct mddev *mddev = &rs->md;
1030 struct dm_raid_superblock *sb = page_address(rdev->sb_page); 1032 struct dm_raid_superblock *sb = page_address(rdev->sb_page);
1031 1033
1032 /* 1034 /*
@@ -1036,8 +1038,10 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev)
1036 if (!mddev->events && super_init_validation(mddev, rdev)) 1038 if (!mddev->events && super_init_validation(mddev, rdev))
1037 return -EINVAL; 1039 return -EINVAL;
1038 1040
1039 mddev->bitmap_info.offset = 4096 >> 9; /* Enable bitmap creation */ 1041 /* Enable bitmap creation for RAID levels != 0 */
1040 rdev->mddev->bitmap_info.default_offset = 4096 >> 9; 1042 mddev->bitmap_info.offset = (rs->raid_type->level) ? to_sector(4096) : 0;
1043 rdev->mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
1044
1041 if (!test_bit(FirstUse, &rdev->flags)) { 1045 if (!test_bit(FirstUse, &rdev->flags)) {
1042 rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset); 1046 rdev->recovery_offset = le64_to_cpu(sb->disk_recovery_offset);
1043 if (rdev->recovery_offset != MaxSector) 1047 if (rdev->recovery_offset != MaxSector)
@@ -1081,6 +1085,8 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
1081 * that the "sync" directive is disallowed during the 1085 * that the "sync" directive is disallowed during the
1082 * reshape. 1086 * reshape.
1083 */ 1087 */
1088 rdev->sectors = to_sector(i_size_read(rdev->bdev->bd_inode));
1089
1084 if (rs->ctr_flags & CTR_FLAG_SYNC) 1090 if (rs->ctr_flags & CTR_FLAG_SYNC)
1085 continue; 1091 continue;
1086 1092
@@ -1139,11 +1145,11 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
1139 * validation for the remaining devices. 1145 * validation for the remaining devices.
1140 */ 1146 */
1141 ti->error = "Unable to assemble array: Invalid superblocks"; 1147 ti->error = "Unable to assemble array: Invalid superblocks";
1142 if (super_validate(mddev, freshest)) 1148 if (super_validate(rs, freshest))
1143 return -EINVAL; 1149 return -EINVAL;
1144 1150
1145 rdev_for_each(rdev, mddev) 1151 rdev_for_each(rdev, mddev)
1146 if ((rdev != freshest) && super_validate(mddev, rdev)) 1152 if ((rdev != freshest) && super_validate(rs, rdev))
1147 return -EINVAL; 1153 return -EINVAL;
1148 1154
1149 return 0; 1155 return 0;
@@ -1281,10 +1287,11 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1281 */ 1287 */
1282 configure_discard_support(ti, rs); 1288 configure_discard_support(ti, rs);
1283 1289
1284 mutex_lock(&rs->md.reconfig_mutex); 1290 /* Has to be held on running the array */
1291 mddev_lock_nointr(&rs->md);
1285 ret = md_run(&rs->md); 1292 ret = md_run(&rs->md);
1286 rs->md.in_sync = 0; /* Assume already marked dirty */ 1293 rs->md.in_sync = 0; /* Assume already marked dirty */
1287 mutex_unlock(&rs->md.reconfig_mutex); 1294 mddev_unlock(&rs->md);
1288 1295
1289 if (ret) { 1296 if (ret) {
1290 ti->error = "Fail to run raid array"; 1297 ti->error = "Fail to run raid array";
@@ -1367,34 +1374,40 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1367 case STATUSTYPE_INFO: 1374 case STATUSTYPE_INFO:
1368 DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks); 1375 DMEMIT("%s %d ", rs->raid_type->name, rs->md.raid_disks);
1369 1376
1370 if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery)) 1377 if (rs->raid_type->level) {
1371 sync = rs->md.curr_resync_completed; 1378 if (test_bit(MD_RECOVERY_RUNNING, &rs->md.recovery))
1372 else 1379 sync = rs->md.curr_resync_completed;
1373 sync = rs->md.recovery_cp; 1380 else
1374 1381 sync = rs->md.recovery_cp;
1375 if (sync >= rs->md.resync_max_sectors) { 1382
1376 /* 1383 if (sync >= rs->md.resync_max_sectors) {
1377 * Sync complete. 1384 /*
1378 */ 1385 * Sync complete.
1386 */
1387 array_in_sync = 1;
1388 sync = rs->md.resync_max_sectors;
1389 } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
1390 /*
1391 * If "check" or "repair" is occurring, the array has
1392 * undergone and initial sync and the health characters
1393 * should not be 'a' anymore.
1394 */
1395 array_in_sync = 1;
1396 } else {
1397 /*
1398 * The array may be doing an initial sync, or it may
1399 * be rebuilding individual components. If all the
1400 * devices are In_sync, then it is the array that is
1401 * being initialized.
1402 */
1403 for (i = 0; i < rs->md.raid_disks; i++)
1404 if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
1405 array_in_sync = 1;
1406 }
1407 } else {
1408 /* RAID0 */
1379 array_in_sync = 1; 1409 array_in_sync = 1;
1380 sync = rs->md.resync_max_sectors; 1410 sync = rs->md.resync_max_sectors;
1381 } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
1382 /*
1383 * If "check" or "repair" is occurring, the array has
1384 * undergone and initial sync and the health characters
1385 * should not be 'a' anymore.
1386 */
1387 array_in_sync = 1;
1388 } else {
1389 /*
1390 * The array may be doing an initial sync, or it may
1391 * be rebuilding individual components. If all the
1392 * devices are In_sync, then it is the array that is
1393 * being initialized.
1394 */
1395 for (i = 0; i < rs->md.raid_disks; i++)
1396 if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
1397 array_in_sync = 1;
1398 } 1411 }
1399 1412
1400 /* 1413 /*
@@ -1683,26 +1696,48 @@ static void raid_resume(struct dm_target *ti)
1683{ 1696{
1684 struct raid_set *rs = ti->private; 1697 struct raid_set *rs = ti->private;
1685 1698
1686 set_bit(MD_CHANGE_DEVS, &rs->md.flags); 1699 if (rs->raid_type->level) {
1687 if (!rs->bitmap_loaded) { 1700 set_bit(MD_CHANGE_DEVS, &rs->md.flags);
1688 bitmap_load(&rs->md); 1701
1689 rs->bitmap_loaded = 1; 1702 if (!rs->bitmap_loaded) {
1690 } else { 1703 bitmap_load(&rs->md);
1691 /* 1704 rs->bitmap_loaded = 1;
1692 * A secondary resume while the device is active. 1705 } else {
1693 * Take this opportunity to check whether any failed 1706 /*
1694 * devices are reachable again. 1707 * A secondary resume while the device is active.
1695 */ 1708 * Take this opportunity to check whether any failed
1696 attempt_restore_of_faulty_devices(rs); 1709 * devices are reachable again.
1710 */
1711 attempt_restore_of_faulty_devices(rs);
1712 }
1713
1714 clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
1697 } 1715 }
1698 1716
1699 clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
1700 mddev_resume(&rs->md); 1717 mddev_resume(&rs->md);
1701} 1718}
1702 1719
1720static int raid_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
1721 struct bio_vec *biovec, int max_size)
1722{
1723 struct raid_set *rs = ti->private;
1724 struct md_personality *pers = rs->md.pers;
1725
1726 if (pers && pers->mergeable_bvec)
1727 return min(max_size, pers->mergeable_bvec(&rs->md, bvm, biovec));
1728
1729 /*
1730 * In case we can't request the personality because
1731 * the raid set is not running yet
1732 *
1733 * -> return safe minimum
1734 */
1735 return rs->md.chunk_sectors;
1736}
1737
1703static struct target_type raid_target = { 1738static struct target_type raid_target = {
1704 .name = "raid", 1739 .name = "raid",
1705 .version = {1, 6, 0}, 1740 .version = {1, 7, 0},
1706 .module = THIS_MODULE, 1741 .module = THIS_MODULE,
1707 .ctr = raid_ctr, 1742 .ctr = raid_ctr,
1708 .dtr = raid_dtr, 1743 .dtr = raid_dtr,
@@ -1714,6 +1749,7 @@ static struct target_type raid_target = {
1714 .presuspend = raid_presuspend, 1749 .presuspend = raid_presuspend,
1715 .postsuspend = raid_postsuspend, 1750 .postsuspend = raid_postsuspend,
1716 .resume = raid_resume, 1751 .resume = raid_resume,
1752 .merge = raid_merge,
1717}; 1753};
1718 1754
1719static int __init dm_raid_init(void) 1755static int __init dm_raid_init(void)