aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Brassow <jbrassow@redhat.com>2013-04-23 21:42:43 -0400
committerNeilBrown <neilb@suse.de>2013-04-23 21:42:43 -0400
commitbe83651f0050ca8621d58d35dad558e9c45cb18f (patch)
treede79ac9ee9ccc36816a346987024865e72760528
parenta91d5ac04841ca1be340e8610e6d899fc8b419b5 (diff)
DM RAID: Add message/status support for changing sync action
DM RAID: Add message/status support for changing sync action This patch adds a message interface to dm-raid to allow the user to more finely control the sync actions being performed by the MD driver. This gives the user the ability to initiate "check" and "repair" (i.e. scrubbing). Two additional fields have been appended to the status output to provide more information about the type of sync action occurring and the results of those actions, specifically: <sync_action> and <mismatch_cnt>. These new fields will always be populated. This is essentially the device-mapper way of doing what MD controls through the 'sync_action' sysfs file and shows through the 'mismatch_cnt' sysfs file. Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--Documentation/device-mapper/dm-raid.txt84
-rw-r--r--drivers/md/dm-raid.c111
2 files changed, 176 insertions, 19 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index b428556197c9..e9192283e5a5 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -1,10 +1,13 @@
1dm-raid 1dm-raid
2------- 2=======
3 3
4The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. 4The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
5It allows the MD RAID drivers to be accessed using a device-mapper 5It allows the MD RAID drivers to be accessed using a device-mapper
6interface. 6interface.
7 7
8
9Mapping Table Interface
10-----------------------
8The target is named "raid" and it accepts the following parameters: 11The target is named "raid" and it accepts the following parameters:
9 12
10 <raid_type> <#raid_params> <raid_params> \ 13 <raid_type> <#raid_params> <raid_params> \
@@ -47,7 +50,7 @@ The target is named "raid" and it accepts the following parameters:
47 followed by optional parameters (in any order): 50 followed by optional parameters (in any order):
48 [sync|nosync] Force or prevent RAID initialization. 51 [sync|nosync] Force or prevent RAID initialization.
49 52
50 [rebuild <idx>] Rebuild drive number idx (first drive is 0). 53 [rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
51 54
52 [daemon_sleep <ms>] 55 [daemon_sleep <ms>]
53 Interval between runs of the bitmap daemon that 56 Interval between runs of the bitmap daemon that
@@ -56,9 +59,9 @@ The target is named "raid" and it accepts the following parameters:
56 59
57 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization 60 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
58 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization 61 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
59 [write_mostly <idx>] Drive index is write-mostly 62 [write_mostly <idx>] Mark drive index 'idx' write-mostly.
60 [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 63 [max_write_behind <sectors>] See '--write-behind=' (man mdadm)
61 [stripe_cache <sectors>] Stripe cache size (higher RAIDs only) 64 [stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
62 [region_size <sectors>] 65 [region_size <sectors>]
63 The region_size multiplied by the number of regions is the 66 The region_size multiplied by the number of regions is the
64 logical size of the array. The bitmap records the device 67 logical size of the array. The bitmap records the device
@@ -122,7 +125,7 @@ The target is named "raid" and it accepts the following parameters:
122 given for both the metadata and data drives for a given position. 125 given for both the metadata and data drives for a given position.
123 126
124 127
125Example tables 128Example Tables
126-------------- 129--------------
127# RAID4 - 4 data drives, 1 parity (no metadata devices) 130# RAID4 - 4 data drives, 1 parity (no metadata devices)
128# No metadata devices specified to hold superblock/bitmap info 131# No metadata devices specified to hold superblock/bitmap info
@@ -141,26 +144,70 @@ Example tables
141 raid4 4 2048 sync min_recovery_rate 20 \ 144 raid4 4 2048 sync min_recovery_rate 20 \
142 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 145 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
143 146
147
148Status Output
149-------------
144'dmsetup table' displays the table used to construct the mapping. 150'dmsetup table' displays the table used to construct the mapping.
145The optional parameters are always printed in the order listed 151The optional parameters are always printed in the order listed
146above with "sync" or "nosync" always output ahead of the other 152above with "sync" or "nosync" always output ahead of the other
147arguments, regardless of the order used when originally loading the table. 153arguments, regardless of the order used when originally loading the table.
148Arguments that can be repeated are ordered by value. 154Arguments that can be repeated are ordered by value.
149 155
150'dmsetup status' yields information on the state and health of the 156
151array. 157'dmsetup status' yields information on the state and health of the array.
152The output is as follows: 158The output is as follows (normally a single line, but expanded here for
159clarity):
1531: <s> <l> raid \ 1601: <s> <l> raid \
1542: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> 1612: <raid_type> <#devices> <health_chars> \
1623: <sync_ratio> <sync_action> <mismatch_cnt>
155 163
156Line 1 is the standard output produced by device-mapper. 164Line 1 is the standard output produced by device-mapper.
157Line 2 is produced by the raid target, and best explained by example: 165Line 2 & 3 are produced by the raid target and are best explained by example:
158 0 1960893648 raid raid4 5 AAAAA 2/490221568 166 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
159Here we can see the RAID type is raid4, there are 5 devices - all of 167Here we can see the RAID type is raid4, there are 5 devices - all of
160which are 'A'live, and the array is 2/490221568 complete with recovery. 168which are 'A'live, and the array is 2/490221568 complete with its initial
161Faulty or missing devices are marked 'D'. Devices that are out-of-sync 169recovery. Here is a fuller description of the individual fields:
162are marked 'a'. 170 <raid_type> Same as the <raid_type> used to create the array.
163 171 <health_chars> One char for each device, indicating: 'A' = alive and
172 in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
173 <sync_ratio> The ratio indicating how much of the array has undergone
174 the process described by 'sync_action'. If the
175 'sync_action' is "check" or "repair", then the process
176 of "resync" or "recover" can be considered complete.
177 <sync_action> One of the following possible states:
178 idle - No synchronization action is being performed.
179 frozen - The current action has been halted.
180 resync - Array is undergoing its initial synchronization
181 or is resynchronizing after an unclean shutdown
182 (possibly aided by a bitmap).
183 recover - A device in the array is being rebuilt or
184 replaced.
185 check - A user-initiated full check of the array is
186 being performed. All blocks are read and
187 checked for consistency. The number of
188 discrepancies found are recorded in
189 <mismatch_cnt>. No changes are made to the
190 array by this action.
191 repair - The same as "check", but discrepancies are
192 corrected.
193 reshape - The array is undergoing a reshape.
194 <mismatch_cnt> The number of discrepancies found between mirror copies
195 in RAID1/10 or wrong parity values found in RAID4/5/6.
196 This value is valid only after a "check" of the array
197 is performed. A healthy array has a 'mismatch_cnt' of 0.
198
199Message Interface
200-----------------
201The dm-raid target will accept certain actions through the 'message' interface.
202('man dmsetup' for more information on the message interface.) These actions
203include:
204 "idle" - Halt the current sync action.
205 "frozen" - Freeze the current sync action.
206 "resync" - Initiate/continue a resync.
207 "recover"- Initiate/continue a recover process.
208 "check" - Initiate a check (i.e. a "scrub") of the array.
209 "repair" - Initiate a repair of the array.
210 "reshape"- Currently unsupported (-EINVAL).
164 211
165Version History 212Version History
166--------------- 213---------------
@@ -171,4 +218,7 @@ Version History
1711.3.1 Allow device replacement/rebuild for RAID 10 2181.3.1 Allow device replacement/rebuild for RAID 10
1721.3.2 Fix/improve redundancy checking for RAID10 2191.3.2 Fix/improve redundancy checking for RAID10
1731.4.0 Non-functional change. Removes arg from mapping function. 2201.4.0 Non-functional change. Removes arg from mapping function.
1741.4.1 Add RAID10 "far" and "offset" algorithm support. 2211.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
2221.4.2 Add RAID10 "far" and "offset" algorithm support.
2231.5.0 Add message interface to allow manipulation of the sync_action.
224 New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 311e3d35b272..1d3fe1a40a9b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
1279 return DM_MAPIO_SUBMITTED; 1279 return DM_MAPIO_SUBMITTED;
1280} 1280}
1281 1281
1282static const char *decipher_sync_action(struct mddev *mddev)
1283{
1284 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
1285 return "frozen";
1286
1287 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
1288 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
1289 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
1290 return "reshape";
1291
1292 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
1293 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1294 return "resync";
1295 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1296 return "check";
1297 return "repair";
1298 }
1299
1300 if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
1301 return "recover";
1302 }
1303
1304 return "idle";
1305}
1306
1282static void raid_status(struct dm_target *ti, status_type_t type, 1307static void raid_status(struct dm_target *ti, status_type_t type,
1283 unsigned status_flags, char *result, unsigned maxlen) 1308 unsigned status_flags, char *result, unsigned maxlen)
1284{ 1309{
@@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1298 sync = rs->md.recovery_cp; 1323 sync = rs->md.recovery_cp;
1299 1324
1300 if (sync >= rs->md.resync_max_sectors) { 1325 if (sync >= rs->md.resync_max_sectors) {
1326 /*
1327 * Sync complete.
1328 */
1301 array_in_sync = 1; 1329 array_in_sync = 1;
1302 sync = rs->md.resync_max_sectors; 1330 sync = rs->md.resync_max_sectors;
1331 } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
1332 /*
1333 * If "check" or "repair" is occurring, the array has
1334 * undergone and initial sync and the health characters
1335 * should not be 'a' anymore.
1336 */
1337 array_in_sync = 1;
1303 } else { 1338 } else {
1304 /* 1339 /*
1305 * The array may be doing an initial sync, or it may 1340 * The array may be doing an initial sync, or it may
@@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1311 if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) 1346 if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
1312 array_in_sync = 1; 1347 array_in_sync = 1;
1313 } 1348 }
1349
1314 /* 1350 /*
1315 * Status characters: 1351 * Status characters:
1316 * 'D' = Dead/Failed device 1352 * 'D' = Dead/Failed device
@@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1339 (unsigned long long) sync, 1375 (unsigned long long) sync,
1340 (unsigned long long) rs->md.resync_max_sectors); 1376 (unsigned long long) rs->md.resync_max_sectors);
1341 1377
1378 /*
1379 * Sync action:
1380 * See Documentation/device-mapper/dm-raid.c for
1381 * information on each of these states.
1382 */
1383 DMEMIT(" %s", decipher_sync_action(&rs->md));
1384
1385 /*
1386 * resync_mismatches/mismatch_cnt
1387 * This field shows the number of discrepancies found when
1388 * performing a "check" of the array.
1389 */
1390 DMEMIT(" %llu",
1391 (unsigned long long)
1392 atomic64_read(&rs->md.resync_mismatches));
1342 break; 1393 break;
1343 case STATUSTYPE_TABLE: 1394 case STATUSTYPE_TABLE:
1344 /* The string you would use to construct this array */ 1395 /* The string you would use to construct this array */
@@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1425 } 1476 }
1426} 1477}
1427 1478
1428static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) 1479static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
1480{
1481 struct raid_set *rs = ti->private;
1482 struct mddev *mddev = &rs->md;
1483
1484 if (!strcasecmp(argv[0], "reshape")) {
1485 DMERR("Reshape not supported.");
1486 return -EINVAL;
1487 }
1488
1489 if (!mddev->pers || !mddev->pers->sync_request)
1490 return -EINVAL;
1491
1492 if (!strcasecmp(argv[0], "frozen"))
1493 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
1494 else
1495 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
1496
1497 if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
1498 if (mddev->sync_thread) {
1499 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1500 md_reap_sync_thread(mddev);
1501 }
1502 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
1503 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
1504 return -EBUSY;
1505 else if (!strcasecmp(argv[0], "resync"))
1506 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1507 else if (!strcasecmp(argv[0], "recover")) {
1508 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
1509 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1510 } else {
1511 if (!strcasecmp(argv[0], "check"))
1512 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
1513 else if (!!strcasecmp(argv[0], "repair"))
1514 return -EINVAL;
1515 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
1516 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
1517 }
1518 if (mddev->ro == 2) {
1519 /* A write to sync_action is enough to justify
1520 * canceling read-auto mode
1521 */
1522 mddev->ro = 0;
1523 if (!mddev->suspended)
1524 md_wakeup_thread(mddev->sync_thread);
1525 }
1526 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1527 if (!mddev->suspended)
1528 md_wakeup_thread(mddev->thread);
1529
1530 return 0;
1531}
1532
1533static int raid_iterate_devices(struct dm_target *ti,
1534 iterate_devices_callout_fn fn, void *data)
1429{ 1535{
1430 struct raid_set *rs = ti->private; 1536 struct raid_set *rs = ti->private;
1431 unsigned i; 1537 unsigned i;
@@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti)
1482 1588
1483static struct target_type raid_target = { 1589static struct target_type raid_target = {
1484 .name = "raid", 1590 .name = "raid",
1485 .version = {1, 4, 2}, 1591 .version = {1, 5, 0},
1486 .module = THIS_MODULE, 1592 .module = THIS_MODULE,
1487 .ctr = raid_ctr, 1593 .ctr = raid_ctr,
1488 .dtr = raid_dtr, 1594 .dtr = raid_dtr,
1489 .map = raid_map, 1595 .map = raid_map,
1490 .status = raid_status, 1596 .status = raid_status,
1597 .message = raid_message,
1491 .iterate_devices = raid_iterate_devices, 1598 .iterate_devices = raid_iterate_devices,
1492 .io_hints = raid_io_hints, 1599 .io_hints = raid_io_hints,
1493 .presuspend = raid_presuspend, 1600 .presuspend = raid_presuspend,