diff options
-rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 84 | ||||
-rw-r--r-- | Documentation/md.txt | 16 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 111 | ||||
-rw-r--r-- | drivers/md/md.c | 235 | ||||
-rw-r--r-- | drivers/md/md.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 24 | ||||
-rw-r--r-- | drivers/md/raid5.c | 16 |
9 files changed, 368 insertions, 131 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index b428556197c9..e9192283e5a5 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
@@ -1,10 +1,13 @@ | |||
1 | dm-raid | 1 | dm-raid |
2 | ------- | 2 | ======= |
3 | 3 | ||
4 | The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. | 4 | The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. |
5 | It allows the MD RAID drivers to be accessed using a device-mapper | 5 | It allows the MD RAID drivers to be accessed using a device-mapper |
6 | interface. | 6 | interface. |
7 | 7 | ||
8 | |||
9 | Mapping Table Interface | ||
10 | ----------------------- | ||
8 | The target is named "raid" and it accepts the following parameters: | 11 | The target is named "raid" and it accepts the following parameters: |
9 | 12 | ||
10 | <raid_type> <#raid_params> <raid_params> \ | 13 | <raid_type> <#raid_params> <raid_params> \ |
@@ -47,7 +50,7 @@ The target is named "raid" and it accepts the following parameters: | |||
47 | followed by optional parameters (in any order): | 50 | followed by optional parameters (in any order): |
48 | [sync|nosync] Force or prevent RAID initialization. | 51 | [sync|nosync] Force or prevent RAID initialization. |
49 | 52 | ||
50 | [rebuild <idx>] Rebuild drive number idx (first drive is 0). | 53 | [rebuild <idx>] Rebuild drive number 'idx' (first drive is 0). |
51 | 54 | ||
52 | [daemon_sleep <ms>] | 55 | [daemon_sleep <ms>] |
53 | Interval between runs of the bitmap daemon that | 56 | Interval between runs of the bitmap daemon that |
@@ -56,9 +59,9 @@ The target is named "raid" and it accepts the following parameters: | |||
56 | 59 | ||
57 | [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 60 | [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization |
58 | [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization | 61 | [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization |
59 | [write_mostly <idx>] Drive index is write-mostly | 62 | [write_mostly <idx>] Mark drive index 'idx' write-mostly. |
60 | [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 63 | [max_write_behind <sectors>] See '--write-behind=' (man mdadm) |
61 | [stripe_cache <sectors>] Stripe cache size (higher RAIDs only) | 64 | [stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only) |
62 | [region_size <sectors>] | 65 | [region_size <sectors>] |
63 | The region_size multiplied by the number of regions is the | 66 | The region_size multiplied by the number of regions is the |
64 | logical size of the array. The bitmap records the device | 67 | logical size of the array. The bitmap records the device |
@@ -122,7 +125,7 @@ The target is named "raid" and it accepts the following parameters: | |||
122 | given for both the metadata and data drives for a given position. | 125 | given for both the metadata and data drives for a given position. |
123 | 126 | ||
124 | 127 | ||
125 | Example tables | 128 | Example Tables |
126 | -------------- | 129 | -------------- |
127 | # RAID4 - 4 data drives, 1 parity (no metadata devices) | 130 | # RAID4 - 4 data drives, 1 parity (no metadata devices) |
128 | # No metadata devices specified to hold superblock/bitmap info | 131 | # No metadata devices specified to hold superblock/bitmap info |
@@ -141,26 +144,70 @@ Example tables | |||
141 | raid4 4 2048 sync min_recovery_rate 20 \ | 144 | raid4 4 2048 sync min_recovery_rate 20 \ |
142 | 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 | 145 | 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 |
143 | 146 | ||
147 | |||
148 | Status Output | ||
149 | ------------- | ||
144 | 'dmsetup table' displays the table used to construct the mapping. | 150 | 'dmsetup table' displays the table used to construct the mapping. |
145 | The optional parameters are always printed in the order listed | 151 | The optional parameters are always printed in the order listed |
146 | above with "sync" or "nosync" always output ahead of the other | 152 | above with "sync" or "nosync" always output ahead of the other |
147 | arguments, regardless of the order used when originally loading the table. | 153 | arguments, regardless of the order used when originally loading the table. |
148 | Arguments that can be repeated are ordered by value. | 154 | Arguments that can be repeated are ordered by value. |
149 | 155 | ||
150 | 'dmsetup status' yields information on the state and health of the | 156 | |
151 | array. | 157 | 'dmsetup status' yields information on the state and health of the array. |
152 | The output is as follows: | 158 | The output is as follows (normally a single line, but expanded here for |
159 | clarity): | ||
153 | 1: <s> <l> raid \ | 160 | 1: <s> <l> raid \ |
154 | 2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> | 161 | 2: <raid_type> <#devices> <health_chars> \ |
162 | 3: <sync_ratio> <sync_action> <mismatch_cnt> | ||
155 | 163 | ||
156 | Line 1 is the standard output produced by device-mapper. | 164 | Line 1 is the standard output produced by device-mapper. |
157 | Line 2 is produced by the raid target, and best explained by example: | 165 | Line 2 & 3 are produced by the raid target and are best explained by example: |
158 | 0 1960893648 raid raid4 5 AAAAA 2/490221568 | 166 | 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0 |
159 | Here we can see the RAID type is raid4, there are 5 devices - all of | 167 | Here we can see the RAID type is raid4, there are 5 devices - all of |
160 | which are 'A'live, and the array is 2/490221568 complete with recovery. | 168 | which are 'A'live, and the array is 2/490221568 complete with its initial |
161 | Faulty or missing devices are marked 'D'. Devices that are out-of-sync | 169 | recovery. Here is a fuller description of the individual fields: |
162 | are marked 'a'. | 170 | <raid_type> Same as the <raid_type> used to create the array. |
163 | 171 | <health_chars> One char for each device, indicating: 'A' = alive and | |
172 | in-sync, 'a' = alive but not in-sync, 'D' = dead/failed. | ||
173 | <sync_ratio> The ratio indicating how much of the array has undergone | ||
174 | the process described by 'sync_action'. If the | ||
175 | 'sync_action' is "check" or "repair", then the process | ||
176 | of "resync" or "recover" can be considered complete. | ||
177 | <sync_action> One of the following possible states: | ||
178 | idle - No synchronization action is being performed. | ||
179 | frozen - The current action has been halted. | ||
180 | resync - Array is undergoing its initial synchronization | ||
181 | or is resynchronizing after an unclean shutdown | ||
182 | (possibly aided by a bitmap). | ||
183 | recover - A device in the array is being rebuilt or | ||
184 | replaced. | ||
185 | check - A user-initiated full check of the array is | ||
186 | being performed. All blocks are read and | ||
187 | checked for consistency. The number of | ||
188 | discrepancies found are recorded in | ||
189 | <mismatch_cnt>. No changes are made to the | ||
190 | array by this action. | ||
191 | repair - The same as "check", but discrepancies are | ||
192 | corrected. | ||
193 | reshape - The array is undergoing a reshape. | ||
194 | <mismatch_cnt> The number of discrepancies found between mirror copies | ||
195 | in RAID1/10 or wrong parity values found in RAID4/5/6. | ||
196 | This value is valid only after a "check" of the array | ||
197 | is performed. A healthy array has a 'mismatch_cnt' of 0. | ||
198 | |||
199 | Message Interface | ||
200 | ----------------- | ||
201 | The dm-raid target will accept certain actions through the 'message' interface. | ||
202 | ('man dmsetup' for more information on the message interface.) These actions | ||
203 | include: | ||
204 | "idle" - Halt the current sync action. | ||
205 | "frozen" - Freeze the current sync action. | ||
206 | "resync" - Initiate/continue a resync. | ||
207 | "recover"- Initiate/continue a recover process. | ||
208 | "check" - Initiate a check (i.e. a "scrub") of the array. | ||
209 | "repair" - Initiate a repair of the array. | ||
210 | "reshape"- Currently unsupported (-EINVAL). | ||
164 | 211 | ||
165 | Version History | 212 | Version History |
166 | --------------- | 213 | --------------- |
@@ -171,4 +218,7 @@ Version History | |||
171 | 1.3.1 Allow device replacement/rebuild for RAID 10 | 218 | 1.3.1 Allow device replacement/rebuild for RAID 10 |
172 | 1.3.2 Fix/improve redundancy checking for RAID10 | 219 | 1.3.2 Fix/improve redundancy checking for RAID10 |
173 | 1.4.0 Non-functional change. Removes arg from mapping function. | 220 | 1.4.0 Non-functional change. Removes arg from mapping function. |
174 | 1.4.1 Add RAID10 "far" and "offset" algorithm support. | 221 | 1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5). |
222 | 1.4.2 Add RAID10 "far" and "offset" algorithm support. | ||
223 | 1.5.0 Add message interface to allow manipulation of the sync_action. | ||
224 | New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt. | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index 993fba37b7d1..e0ddd327632d 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -119,7 +119,7 @@ device to add. | |||
119 | The array is started with the RUN_ARRAY ioctl. | 119 | The array is started with the RUN_ARRAY ioctl. |
120 | 120 | ||
121 | Once started, new devices can be added. They should have an | 121 | Once started, new devices can be added. They should have an |
122 | appropriate superblock written to them, and then passed be in with | 122 | appropriate superblock written to them, and then be passed in with |
123 | ADD_NEW_DISK. | 123 | ADD_NEW_DISK. |
124 | 124 | ||
125 | Devices that have failed or are not yet active can be detached from an | 125 | Devices that have failed or are not yet active can be detached from an |
@@ -131,7 +131,7 @@ Specific Rules that apply to format-0 super block arrays, and | |||
131 | ------------------------------------------------------------- | 131 | ------------------------------------------------------------- |
132 | 132 | ||
133 | An array can be 'created' by describing the array (level, chunksize | 133 | An array can be 'created' by describing the array (level, chunksize |
134 | etc) in a SET_ARRAY_INFO ioctl. This must has major_version==0 and | 134 | etc) in a SET_ARRAY_INFO ioctl. This must have major_version==0 and |
135 | raid_disks != 0. | 135 | raid_disks != 0. |
136 | 136 | ||
137 | Then uninitialized devices can be added with ADD_NEW_DISK. The | 137 | Then uninitialized devices can be added with ADD_NEW_DISK. The |
@@ -426,7 +426,7 @@ Each directory contains: | |||
426 | offset | 426 | offset |
427 | This gives the location in the device (in sectors from the | 427 | This gives the location in the device (in sectors from the |
428 | start) where data from the array will be stored. Any part of | 428 | start) where data from the array will be stored. Any part of |
429 | the device before this offset us not touched, unless it is | 429 | the device before this offset is not touched, unless it is |
430 | used for storing metadata (Formats 1.1 and 1.2). | 430 | used for storing metadata (Formats 1.1 and 1.2). |
431 | 431 | ||
432 | size | 432 | size |
@@ -440,7 +440,7 @@ Each directory contains: | |||
440 | When the device is not 'in_sync', this records the number of | 440 | When the device is not 'in_sync', this records the number of |
441 | sectors from the start of the device which are known to be | 441 | sectors from the start of the device which are known to be |
442 | correct. This is normally zero, but during a recovery | 442 | correct. This is normally zero, but during a recovery |
443 | operation is will steadily increase, and if the recovery is | 443 | operation it will steadily increase, and if the recovery is |
444 | interrupted, restoring this value can cause recovery to | 444 | interrupted, restoring this value can cause recovery to |
445 | avoid repeating the earlier blocks. With v1.x metadata, this | 445 | avoid repeating the earlier blocks. With v1.x metadata, this |
446 | value is saved and restored automatically. | 446 | value is saved and restored automatically. |
@@ -468,7 +468,7 @@ Each directory contains: | |||
468 | 468 | ||
469 | 469 | ||
470 | 470 | ||
471 | An active md device will also contain and entry for each active device | 471 | An active md device will also contain an entry for each active device |
472 | in the array. These are named | 472 | in the array. These are named |
473 | 473 | ||
474 | rdNN | 474 | rdNN |
@@ -482,7 +482,7 @@ will show 'in_sync' on every line. | |||
482 | 482 | ||
483 | 483 | ||
484 | 484 | ||
485 | Active md devices for levels that support data redundancy (1,4,5,6) | 485 | Active md devices for levels that support data redundancy (1,4,5,6,10) |
486 | also have | 486 | also have |
487 | 487 | ||
488 | sync_action | 488 | sync_action |
@@ -494,7 +494,7 @@ also have | |||
494 | failed/missing device | 494 | failed/missing device |
495 | idle - nothing is happening | 495 | idle - nothing is happening |
496 | check - A full check of redundancy was requested and is | 496 | check - A full check of redundancy was requested and is |
497 | happening. This reads all block and checks | 497 | happening. This reads all blocks and checks |
498 | them. A repair may also happen for some raid | 498 | them. A repair may also happen for some raid |
499 | levels. | 499 | levels. |
500 | repair - A full check and repair is happening. This is | 500 | repair - A full check and repair is happening. This is |
@@ -522,7 +522,7 @@ also have | |||
522 | 522 | ||
523 | degraded | 523 | degraded |
524 | This contains a count of the number of devices by which the | 524 | This contains a count of the number of devices by which the |
525 | arrays is degraded. So an optimal array with show '0'. A | 525 | arrays is degraded. So an optimal array will show '0'. A |
526 | single failed/missing drive will show '1', etc. | 526 | single failed/missing drive will show '1', etc. |
527 | This file responds to select/poll, any increase or decrease | 527 | This file responds to select/poll, any increase or decrease |
528 | in the count of missing devices will trigger an event. | 528 | in the count of missing devices will trigger an event. |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 4fd9d6aeff6a..5a2c75499824 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) | |||
846 | if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) | 846 | if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) |
847 | set_bit(bit, kaddr); | 847 | set_bit(bit, kaddr); |
848 | else | 848 | else |
849 | test_and_set_bit_le(bit, kaddr); | 849 | set_bit_le(bit, kaddr); |
850 | kunmap_atomic(kaddr); | 850 | kunmap_atomic(kaddr); |
851 | pr_debug("set file bit %lu page %lu\n", bit, page->index); | 851 | pr_debug("set file bit %lu page %lu\n", bit, page->index); |
852 | /* record page number so it gets flushed to disk when unplug occurs */ | 852 | /* record page number so it gets flushed to disk when unplug occurs */ |
@@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) | |||
868 | if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) | 868 | if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) |
869 | clear_bit(bit, paddr); | 869 | clear_bit(bit, paddr); |
870 | else | 870 | else |
871 | test_and_clear_bit_le(bit, paddr); | 871 | clear_bit_le(bit, paddr); |
872 | kunmap_atomic(paddr); | 872 | kunmap_atomic(paddr); |
873 | if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) { | 873 | if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) { |
874 | set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING); | 874 | set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING); |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 311e3d35b272..1d3fe1a40a9b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio) | |||
1279 | return DM_MAPIO_SUBMITTED; | 1279 | return DM_MAPIO_SUBMITTED; |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | static const char *decipher_sync_action(struct mddev *mddev) | ||
1283 | { | ||
1284 | if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | ||
1285 | return "frozen"; | ||
1286 | |||
1287 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1288 | (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { | ||
1289 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | ||
1290 | return "reshape"; | ||
1291 | |||
1292 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
1293 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
1294 | return "resync"; | ||
1295 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | ||
1296 | return "check"; | ||
1297 | return "repair"; | ||
1298 | } | ||
1299 | |||
1300 | if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) | ||
1301 | return "recover"; | ||
1302 | } | ||
1303 | |||
1304 | return "idle"; | ||
1305 | } | ||
1306 | |||
1282 | static void raid_status(struct dm_target *ti, status_type_t type, | 1307 | static void raid_status(struct dm_target *ti, status_type_t type, |
1283 | unsigned status_flags, char *result, unsigned maxlen) | 1308 | unsigned status_flags, char *result, unsigned maxlen) |
1284 | { | 1309 | { |
@@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1298 | sync = rs->md.recovery_cp; | 1323 | sync = rs->md.recovery_cp; |
1299 | 1324 | ||
1300 | if (sync >= rs->md.resync_max_sectors) { | 1325 | if (sync >= rs->md.resync_max_sectors) { |
1326 | /* | ||
1327 | * Sync complete. | ||
1328 | */ | ||
1301 | array_in_sync = 1; | 1329 | array_in_sync = 1; |
1302 | sync = rs->md.resync_max_sectors; | 1330 | sync = rs->md.resync_max_sectors; |
1331 | } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) { | ||
1332 | /* | ||
1333 | * If "check" or "repair" is occurring, the array has | ||
1334 | * undergone and initial sync and the health characters | ||
1335 | * should not be 'a' anymore. | ||
1336 | */ | ||
1337 | array_in_sync = 1; | ||
1303 | } else { | 1338 | } else { |
1304 | /* | 1339 | /* |
1305 | * The array may be doing an initial sync, or it may | 1340 | * The array may be doing an initial sync, or it may |
@@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1311 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) | 1346 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) |
1312 | array_in_sync = 1; | 1347 | array_in_sync = 1; |
1313 | } | 1348 | } |
1349 | |||
1314 | /* | 1350 | /* |
1315 | * Status characters: | 1351 | * Status characters: |
1316 | * 'D' = Dead/Failed device | 1352 | * 'D' = Dead/Failed device |
@@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1339 | (unsigned long long) sync, | 1375 | (unsigned long long) sync, |
1340 | (unsigned long long) rs->md.resync_max_sectors); | 1376 | (unsigned long long) rs->md.resync_max_sectors); |
1341 | 1377 | ||
1378 | /* | ||
1379 | * Sync action: | ||
1380 | * See Documentation/device-mapper/dm-raid.c for | ||
1381 | * information on each of these states. | ||
1382 | */ | ||
1383 | DMEMIT(" %s", decipher_sync_action(&rs->md)); | ||
1384 | |||
1385 | /* | ||
1386 | * resync_mismatches/mismatch_cnt | ||
1387 | * This field shows the number of discrepancies found when | ||
1388 | * performing a "check" of the array. | ||
1389 | */ | ||
1390 | DMEMIT(" %llu", | ||
1391 | (unsigned long long) | ||
1392 | atomic64_read(&rs->md.resync_mismatches)); | ||
1342 | break; | 1393 | break; |
1343 | case STATUSTYPE_TABLE: | 1394 | case STATUSTYPE_TABLE: |
1344 | /* The string you would use to construct this array */ | 1395 | /* The string you would use to construct this array */ |
@@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1425 | } | 1476 | } |
1426 | } | 1477 | } |
1427 | 1478 | ||
1428 | static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) | 1479 | static int raid_message(struct dm_target *ti, unsigned argc, char **argv) |
1480 | { | ||
1481 | struct raid_set *rs = ti->private; | ||
1482 | struct mddev *mddev = &rs->md; | ||
1483 | |||
1484 | if (!strcasecmp(argv[0], "reshape")) { | ||
1485 | DMERR("Reshape not supported."); | ||
1486 | return -EINVAL; | ||
1487 | } | ||
1488 | |||
1489 | if (!mddev->pers || !mddev->pers->sync_request) | ||
1490 | return -EINVAL; | ||
1491 | |||
1492 | if (!strcasecmp(argv[0], "frozen")) | ||
1493 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
1494 | else | ||
1495 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
1496 | |||
1497 | if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) { | ||
1498 | if (mddev->sync_thread) { | ||
1499 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
1500 | md_reap_sync_thread(mddev); | ||
1501 | } | ||
1502 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | ||
1503 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | ||
1504 | return -EBUSY; | ||
1505 | else if (!strcasecmp(argv[0], "resync")) | ||
1506 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1507 | else if (!strcasecmp(argv[0], "recover")) { | ||
1508 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
1509 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1510 | } else { | ||
1511 | if (!strcasecmp(argv[0], "check")) | ||
1512 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
1513 | else if (!!strcasecmp(argv[0], "repair")) | ||
1514 | return -EINVAL; | ||
1515 | set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
1516 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
1517 | } | ||
1518 | if (mddev->ro == 2) { | ||
1519 | /* A write to sync_action is enough to justify | ||
1520 | * canceling read-auto mode | ||
1521 | */ | ||
1522 | mddev->ro = 0; | ||
1523 | if (!mddev->suspended) | ||
1524 | md_wakeup_thread(mddev->sync_thread); | ||
1525 | } | ||
1526 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
1527 | if (!mddev->suspended) | ||
1528 | md_wakeup_thread(mddev->thread); | ||
1529 | |||
1530 | return 0; | ||
1531 | } | ||
1532 | |||
1533 | static int raid_iterate_devices(struct dm_target *ti, | ||
1534 | iterate_devices_callout_fn fn, void *data) | ||
1429 | { | 1535 | { |
1430 | struct raid_set *rs = ti->private; | 1536 | struct raid_set *rs = ti->private; |
1431 | unsigned i; | 1537 | unsigned i; |
@@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti) | |||
1482 | 1588 | ||
1483 | static struct target_type raid_target = { | 1589 | static struct target_type raid_target = { |
1484 | .name = "raid", | 1590 | .name = "raid", |
1485 | .version = {1, 4, 2}, | 1591 | .version = {1, 5, 0}, |
1486 | .module = THIS_MODULE, | 1592 | .module = THIS_MODULE, |
1487 | .ctr = raid_ctr, | 1593 | .ctr = raid_ctr, |
1488 | .dtr = raid_dtr, | 1594 | .dtr = raid_dtr, |
1489 | .map = raid_map, | 1595 | .map = raid_map, |
1490 | .status = raid_status, | 1596 | .status = raid_status, |
1597 | .message = raid_message, | ||
1491 | .iterate_devices = raid_iterate_devices, | 1598 | .iterate_devices = raid_iterate_devices, |
1492 | .io_hints = raid_io_hints, | 1599 | .io_hints = raid_io_hints, |
1493 | .presuspend = raid_presuspend, | 1600 | .presuspend = raid_presuspend, |
diff --git a/drivers/md/md.c b/drivers/md/md.c index aeceedfc530b..4c74424c78b0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
72 | static struct workqueue_struct *md_wq; | 72 | static struct workqueue_struct *md_wq; |
73 | static struct workqueue_struct *md_misc_wq; | 73 | static struct workqueue_struct *md_misc_wq; |
74 | 74 | ||
75 | static int remove_and_add_spares(struct mddev *mddev, | ||
76 | struct md_rdev *this); | ||
77 | |||
75 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 78 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
76 | 79 | ||
77 | /* | 80 | /* |
@@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1564 | sector, count, 1) == 0) | 1567 | sector, count, 1) == 0) |
1565 | return -EINVAL; | 1568 | return -EINVAL; |
1566 | } | 1569 | } |
1567 | } else if (sb->bblog_offset == 0) | 1570 | } else if (sb->bblog_offset != 0) |
1568 | rdev->badblocks.shift = -1; | 1571 | rdev->badblocks.shift = 0; |
1569 | 1572 | ||
1570 | if (!refdev) { | 1573 | if (!refdev) { |
1571 | ret = 1; | 1574 | ret = 1; |
@@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change) | |||
2411 | int nospares = 0; | 2414 | int nospares = 0; |
2412 | int any_badblocks_changed = 0; | 2415 | int any_badblocks_changed = 0; |
2413 | 2416 | ||
2417 | if (mddev->ro) { | ||
2418 | if (force_change) | ||
2419 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
2420 | return; | ||
2421 | } | ||
2414 | repeat: | 2422 | repeat: |
2415 | /* First make sure individual recovery_offsets are correct */ | 2423 | /* First make sure individual recovery_offsets are correct */ |
2416 | rdev_for_each(rdev, mddev) { | 2424 | rdev_for_each(rdev, mddev) { |
@@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2800 | /* personality does all needed checks */ | 2808 | /* personality does all needed checks */ |
2801 | if (rdev->mddev->pers->hot_remove_disk == NULL) | 2809 | if (rdev->mddev->pers->hot_remove_disk == NULL) |
2802 | return -EINVAL; | 2810 | return -EINVAL; |
2803 | err = rdev->mddev->pers-> | 2811 | clear_bit(Blocked, &rdev->flags); |
2804 | hot_remove_disk(rdev->mddev, rdev); | 2812 | remove_and_add_spares(rdev->mddev, rdev); |
2805 | if (err) | 2813 | if (rdev->raid_disk >= 0) |
2806 | return err; | 2814 | return -EBUSY; |
2807 | sysfs_unlink_rdev(rdev->mddev, rdev); | ||
2808 | rdev->raid_disk = -1; | ||
2809 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | 2815 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); |
2810 | md_wakeup_thread(rdev->mddev->thread); | 2816 | md_wakeup_thread(rdev->mddev->thread); |
2811 | } else if (rdev->mddev->pers) { | 2817 | } else if (rdev->mddev->pers) { |
@@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev) | |||
3221 | * be used - I wonder if that matters | 3227 | * be used - I wonder if that matters |
3222 | */ | 3228 | */ |
3223 | rdev->badblocks.count = 0; | 3229 | rdev->badblocks.count = 0; |
3224 | rdev->badblocks.shift = 0; | 3230 | rdev->badblocks.shift = -1; /* disabled until explicitly enabled */ |
3225 | rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); | 3231 | rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); |
3226 | seqlock_init(&rdev->badblocks.lock); | 3232 | seqlock_init(&rdev->badblocks.lock); |
3227 | if (rdev->badblocks.page == NULL) | 3233 | if (rdev->badblocks.page == NULL) |
@@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe | |||
3293 | goto abort_free; | 3299 | goto abort_free; |
3294 | } | 3300 | } |
3295 | } | 3301 | } |
3296 | if (super_format == -1) | ||
3297 | /* hot-add for 0.90, or non-persistent: so no badblocks */ | ||
3298 | rdev->badblocks.shift = -1; | ||
3299 | 3302 | ||
3300 | return rdev; | 3303 | return rdev; |
3301 | 3304 | ||
@@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page) | |||
4225 | return sprintf(page, "%s\n", type); | 4228 | return sprintf(page, "%s\n", type); |
4226 | } | 4229 | } |
4227 | 4230 | ||
4228 | static void reap_sync_thread(struct mddev *mddev); | ||
4229 | |||
4230 | static ssize_t | 4231 | static ssize_t |
4231 | action_store(struct mddev *mddev, const char *page, size_t len) | 4232 | action_store(struct mddev *mddev, const char *page, size_t len) |
4232 | { | 4233 | { |
@@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len) | |||
4241 | if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { | 4242 | if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { |
4242 | if (mddev->sync_thread) { | 4243 | if (mddev->sync_thread) { |
4243 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 4244 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
4244 | reap_sync_thread(mddev); | 4245 | md_reap_sync_thread(mddev); |
4245 | } | 4246 | } |
4246 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 4247 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
4247 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | 4248 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) |
@@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev) | |||
5279 | if (mddev->sync_thread) { | 5280 | if (mddev->sync_thread) { |
5280 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 5281 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
5281 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5282 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5282 | reap_sync_thread(mddev); | 5283 | md_reap_sync_thread(mddev); |
5283 | } | 5284 | } |
5284 | 5285 | ||
5285 | del_timer_sync(&mddev->safemode_timer); | 5286 | del_timer_sync(&mddev->safemode_timer); |
@@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev) | |||
5287 | bitmap_flush(mddev); | 5288 | bitmap_flush(mddev); |
5288 | md_super_wait(mddev); | 5289 | md_super_wait(mddev); |
5289 | 5290 | ||
5290 | if (!mddev->in_sync || mddev->flags) { | 5291 | if (mddev->ro == 0 && |
5292 | (!mddev->in_sync || mddev->flags)) { | ||
5291 | /* mark array as shutdown cleanly */ | 5293 | /* mark array as shutdown cleanly */ |
5292 | mddev->in_sync = 1; | 5294 | mddev->in_sync = 1; |
5293 | md_update_sb(mddev, 1); | 5295 | md_update_sb(mddev, 1); |
@@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) | |||
5810 | else | 5812 | else |
5811 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 5813 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
5812 | 5814 | ||
5813 | md_update_sb(mddev, 1); | 5815 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5814 | if (mddev->degraded) | 5816 | if (mddev->degraded) |
5815 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 5817 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
5816 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5818 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev) | |||
5877 | if (!rdev) | 5879 | if (!rdev) |
5878 | return -ENXIO; | 5880 | return -ENXIO; |
5879 | 5881 | ||
5882 | clear_bit(Blocked, &rdev->flags); | ||
5883 | remove_and_add_spares(mddev, rdev); | ||
5884 | |||
5880 | if (rdev->raid_disk >= 0) | 5885 | if (rdev->raid_disk >= 0) |
5881 | goto busy; | 5886 | goto busy; |
5882 | 5887 | ||
@@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6490 | err = md_set_readonly(mddev, bdev); | 6495 | err = md_set_readonly(mddev, bdev); |
6491 | goto done_unlock; | 6496 | goto done_unlock; |
6492 | 6497 | ||
6498 | case HOT_REMOVE_DISK: | ||
6499 | err = hot_remove_disk(mddev, new_decode_dev(arg)); | ||
6500 | goto done_unlock; | ||
6501 | |||
6502 | case ADD_NEW_DISK: | ||
6503 | /* We can support ADD_NEW_DISK on read-only arrays | ||
6504 | * on if we are re-adding a preexisting device. | ||
6505 | * So require mddev->pers and MD_DISK_SYNC. | ||
6506 | */ | ||
6507 | if (mddev->pers) { | ||
6508 | mdu_disk_info_t info; | ||
6509 | if (copy_from_user(&info, argp, sizeof(info))) | ||
6510 | err = -EFAULT; | ||
6511 | else if (!(info.state & (1<<MD_DISK_SYNC))) | ||
6512 | /* Need to clear read-only for this */ | ||
6513 | break; | ||
6514 | else | ||
6515 | err = add_new_disk(mddev, &info); | ||
6516 | goto done_unlock; | ||
6517 | } | ||
6518 | break; | ||
6519 | |||
6493 | case BLKROSET: | 6520 | case BLKROSET: |
6494 | if (get_user(ro, (int __user *)(arg))) { | 6521 | if (get_user(ro, (int __user *)(arg))) { |
6495 | err = -EFAULT; | 6522 | err = -EFAULT; |
@@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6560 | goto done_unlock; | 6587 | goto done_unlock; |
6561 | } | 6588 | } |
6562 | 6589 | ||
6563 | case HOT_REMOVE_DISK: | ||
6564 | err = hot_remove_disk(mddev, new_decode_dev(arg)); | ||
6565 | goto done_unlock; | ||
6566 | |||
6567 | case HOT_ADD_DISK: | 6590 | case HOT_ADD_DISK: |
6568 | err = hot_add_disk(mddev, new_decode_dev(arg)); | 6591 | err = hot_add_disk(mddev, new_decode_dev(arg)); |
6569 | goto done_unlock; | 6592 | goto done_unlock; |
@@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread) | |||
7644 | } | 7667 | } |
7645 | EXPORT_SYMBOL_GPL(md_do_sync); | 7668 | EXPORT_SYMBOL_GPL(md_do_sync); |
7646 | 7669 | ||
7647 | static int remove_and_add_spares(struct mddev *mddev) | 7670 | static int remove_and_add_spares(struct mddev *mddev, |
7671 | struct md_rdev *this) | ||
7648 | { | 7672 | { |
7649 | struct md_rdev *rdev; | 7673 | struct md_rdev *rdev; |
7650 | int spares = 0; | 7674 | int spares = 0; |
7651 | int removed = 0; | 7675 | int removed = 0; |
7652 | 7676 | ||
7653 | rdev_for_each(rdev, mddev) | 7677 | rdev_for_each(rdev, mddev) |
7654 | if (rdev->raid_disk >= 0 && | 7678 | if ((this == NULL || rdev == this) && |
7679 | rdev->raid_disk >= 0 && | ||
7655 | !test_bit(Blocked, &rdev->flags) && | 7680 | !test_bit(Blocked, &rdev->flags) && |
7656 | (test_bit(Faulty, &rdev->flags) || | 7681 | (test_bit(Faulty, &rdev->flags) || |
7657 | ! test_bit(In_sync, &rdev->flags)) && | 7682 | ! test_bit(In_sync, &rdev->flags)) && |
@@ -7666,74 +7691,52 @@ static int remove_and_add_spares(struct mddev *mddev) | |||
7666 | if (removed && mddev->kobj.sd) | 7691 | if (removed && mddev->kobj.sd) |
7667 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 7692 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
7668 | 7693 | ||
7694 | if (this) | ||
7695 | goto no_add; | ||
7696 | |||
7669 | rdev_for_each(rdev, mddev) { | 7697 | rdev_for_each(rdev, mddev) { |
7670 | if (rdev->raid_disk >= 0 && | 7698 | if (rdev->raid_disk >= 0 && |
7671 | !test_bit(In_sync, &rdev->flags) && | 7699 | !test_bit(In_sync, &rdev->flags) && |
7672 | !test_bit(Faulty, &rdev->flags)) | 7700 | !test_bit(Faulty, &rdev->flags)) |
7673 | spares++; | 7701 | spares++; |
7674 | if (rdev->raid_disk < 0 | 7702 | if (rdev->raid_disk >= 0) |
7675 | && !test_bit(Faulty, &rdev->flags)) { | 7703 | continue; |
7676 | rdev->recovery_offset = 0; | 7704 | if (test_bit(Faulty, &rdev->flags)) |
7677 | if (mddev->pers-> | 7705 | continue; |
7678 | hot_add_disk(mddev, rdev) == 0) { | 7706 | if (mddev->ro && |
7679 | if (sysfs_link_rdev(mddev, rdev)) | 7707 | rdev->saved_raid_disk < 0) |
7680 | /* failure here is OK */; | 7708 | continue; |
7681 | spares++; | 7709 | |
7682 | md_new_event(mddev); | 7710 | rdev->recovery_offset = 0; |
7683 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7711 | if (rdev->saved_raid_disk >= 0 && mddev->in_sync) { |
7684 | } | 7712 | spin_lock_irq(&mddev->write_lock); |
7713 | if (mddev->in_sync) | ||
7714 | /* OK, this device, which is in_sync, | ||
7715 | * will definitely be noticed before | ||
7716 | * the next write, so recovery isn't | ||
7717 | * needed. | ||
7718 | */ | ||
7719 | rdev->recovery_offset = mddev->recovery_cp; | ||
7720 | spin_unlock_irq(&mddev->write_lock); | ||
7721 | } | ||
7722 | if (mddev->ro && rdev->recovery_offset != MaxSector) | ||
7723 | /* not safe to add this disk now */ | ||
7724 | continue; | ||
7725 | if (mddev->pers-> | ||
7726 | hot_add_disk(mddev, rdev) == 0) { | ||
7727 | if (sysfs_link_rdev(mddev, rdev)) | ||
7728 | /* failure here is OK */; | ||
7729 | spares++; | ||
7730 | md_new_event(mddev); | ||
7731 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
7685 | } | 7732 | } |
7686 | } | 7733 | } |
7734 | no_add: | ||
7687 | if (removed) | 7735 | if (removed) |
7688 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7736 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
7689 | return spares; | 7737 | return spares; |
7690 | } | 7738 | } |
7691 | 7739 | ||
7692 | static void reap_sync_thread(struct mddev *mddev) | ||
7693 | { | ||
7694 | struct md_rdev *rdev; | ||
7695 | |||
7696 | /* resync has finished, collect result */ | ||
7697 | md_unregister_thread(&mddev->sync_thread); | ||
7698 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && | ||
7699 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
7700 | /* success...*/ | ||
7701 | /* activate any spares */ | ||
7702 | if (mddev->pers->spare_active(mddev)) { | ||
7703 | sysfs_notify(&mddev->kobj, NULL, | ||
7704 | "degraded"); | ||
7705 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
7706 | } | ||
7707 | } | ||
7708 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
7709 | mddev->pers->finish_reshape) | ||
7710 | mddev->pers->finish_reshape(mddev); | ||
7711 | |||
7712 | /* If array is no-longer degraded, then any saved_raid_disk | ||
7713 | * information must be scrapped. Also if any device is now | ||
7714 | * In_sync we must scrape the saved_raid_disk for that device | ||
7715 | * do the superblock for an incrementally recovered device | ||
7716 | * written out. | ||
7717 | */ | ||
7718 | rdev_for_each(rdev, mddev) | ||
7719 | if (!mddev->degraded || | ||
7720 | test_bit(In_sync, &rdev->flags)) | ||
7721 | rdev->saved_raid_disk = -1; | ||
7722 | |||
7723 | md_update_sb(mddev, 1); | ||
7724 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
7725 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
7726 | clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
7727 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
7728 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
7729 | /* flag recovery needed just to double check */ | ||
7730 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
7731 | sysfs_notify_dirent_safe(mddev->sysfs_action); | ||
7732 | md_new_event(mddev); | ||
7733 | if (mddev->event_work.func) | ||
7734 | queue_work(md_misc_wq, &mddev->event_work); | ||
7735 | } | ||
7736 | |||
7737 | /* | 7740 | /* |
7738 | * This routine is regularly called by all per-raid-array threads to | 7741 | * This routine is regularly called by all per-raid-array threads to |
7739 | * deal with generic issues like resync and super-block update. | 7742 | * deal with generic issues like resync and super-block update. |
@@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev) | |||
7789 | int spares = 0; | 7792 | int spares = 0; |
7790 | 7793 | ||
7791 | if (mddev->ro) { | 7794 | if (mddev->ro) { |
7792 | /* Only thing we do on a ro array is remove | 7795 | /* On a read-only array we can: |
7793 | * failed devices. | 7796 | * - remove failed devices |
7797 | * - add already-in_sync devices if the array itself | ||
7798 | * is in-sync. | ||
7799 | * As we only add devices that are already in-sync, | ||
7800 | * we can activate the spares immediately. | ||
7794 | */ | 7801 | */ |
7795 | struct md_rdev *rdev; | ||
7796 | rdev_for_each(rdev, mddev) | ||
7797 | if (rdev->raid_disk >= 0 && | ||
7798 | !test_bit(Blocked, &rdev->flags) && | ||
7799 | test_bit(Faulty, &rdev->flags) && | ||
7800 | atomic_read(&rdev->nr_pending)==0) { | ||
7801 | if (mddev->pers->hot_remove_disk( | ||
7802 | mddev, rdev) == 0) { | ||
7803 | sysfs_unlink_rdev(mddev, rdev); | ||
7804 | rdev->raid_disk = -1; | ||
7805 | } | ||
7806 | } | ||
7807 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 7802 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
7803 | remove_and_add_spares(mddev, NULL); | ||
7804 | mddev->pers->spare_active(mddev); | ||
7808 | goto unlock; | 7805 | goto unlock; |
7809 | } | 7806 | } |
7810 | 7807 | ||
@@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7836 | goto unlock; | 7833 | goto unlock; |
7837 | } | 7834 | } |
7838 | if (mddev->sync_thread) { | 7835 | if (mddev->sync_thread) { |
7839 | reap_sync_thread(mddev); | 7836 | md_reap_sync_thread(mddev); |
7840 | goto unlock; | 7837 | goto unlock; |
7841 | } | 7838 | } |
7842 | /* Set RUNNING before clearing NEEDED to avoid | 7839 | /* Set RUNNING before clearing NEEDED to avoid |
@@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7867 | goto unlock; | 7864 | goto unlock; |
7868 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 7865 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
7869 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 7866 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
7870 | } else if ((spares = remove_and_add_spares(mddev))) { | 7867 | } else if ((spares = remove_and_add_spares(mddev, NULL))) { |
7871 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 7868 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
7872 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 7869 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
7873 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | 7870 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); |
@@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev) | |||
7917 | } | 7914 | } |
7918 | } | 7915 | } |
7919 | 7916 | ||
7917 | void md_reap_sync_thread(struct mddev *mddev) | ||
7918 | { | ||
7919 | struct md_rdev *rdev; | ||
7920 | |||
7921 | /* resync has finished, collect result */ | ||
7922 | md_unregister_thread(&mddev->sync_thread); | ||
7923 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && | ||
7924 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
7925 | /* success...*/ | ||
7926 | /* activate any spares */ | ||
7927 | if (mddev->pers->spare_active(mddev)) { | ||
7928 | sysfs_notify(&mddev->kobj, NULL, | ||
7929 | "degraded"); | ||
7930 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
7931 | } | ||
7932 | } | ||
7933 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
7934 | mddev->pers->finish_reshape) | ||
7935 | mddev->pers->finish_reshape(mddev); | ||
7936 | |||
7937 | /* If array is no-longer degraded, then any saved_raid_disk | ||
7938 | * information must be scrapped. Also if any device is now | ||
7939 | * In_sync we must scrape the saved_raid_disk for that device | ||
7940 | * do the superblock for an incrementally recovered device | ||
7941 | * written out. | ||
7942 | */ | ||
7943 | rdev_for_each(rdev, mddev) | ||
7944 | if (!mddev->degraded || | ||
7945 | test_bit(In_sync, &rdev->flags)) | ||
7946 | rdev->saved_raid_disk = -1; | ||
7947 | |||
7948 | md_update_sb(mddev, 1); | ||
7949 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
7950 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
7951 | clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
7952 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
7953 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
7954 | /* flag recovery needed just to double check */ | ||
7955 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
7956 | sysfs_notify_dirent_safe(mddev->sysfs_action); | ||
7957 | md_new_event(mddev); | ||
7958 | if (mddev->event_work.func) | ||
7959 | queue_work(md_misc_wq, &mddev->event_work); | ||
7960 | } | ||
7961 | |||
7920 | void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) | 7962 | void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) |
7921 | { | 7963 | { |
7922 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 7964 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
@@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread); | |||
8642 | EXPORT_SYMBOL(md_unregister_thread); | 8684 | EXPORT_SYMBOL(md_unregister_thread); |
8643 | EXPORT_SYMBOL(md_wakeup_thread); | 8685 | EXPORT_SYMBOL(md_wakeup_thread); |
8644 | EXPORT_SYMBOL(md_check_recovery); | 8686 | EXPORT_SYMBOL(md_check_recovery); |
8687 | EXPORT_SYMBOL(md_reap_sync_thread); | ||
8645 | MODULE_LICENSE("GPL"); | 8688 | MODULE_LICENSE("GPL"); |
8646 | MODULE_DESCRIPTION("MD RAID framework"); | 8689 | MODULE_DESCRIPTION("MD RAID framework"); |
8647 | MODULE_ALIAS("md"); | 8690 | MODULE_ALIAS("md"); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index d90fb1a879e1..653f992b687a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread( | |||
567 | extern void md_unregister_thread(struct md_thread **threadp); | 567 | extern void md_unregister_thread(struct md_thread **threadp); |
568 | extern void md_wakeup_thread(struct md_thread *thread); | 568 | extern void md_wakeup_thread(struct md_thread *thread); |
569 | extern void md_check_recovery(struct mddev *mddev); | 569 | extern void md_check_recovery(struct mddev *mddev); |
570 | extern void md_reap_sync_thread(struct mddev *mddev); | ||
570 | extern void md_write_start(struct mddev *mddev, struct bio *bi); | 571 | extern void md_write_start(struct mddev *mddev, struct bio *bi); |
571 | extern void md_write_end(struct mddev *mddev); | 572 | extern void md_write_end(struct mddev *mddev); |
572 | extern void md_done_sync(struct mddev *mddev, int blocks, int ok); | 573 | extern void md_done_sync(struct mddev *mddev, int blocks, int ok); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fd86b372692d..851023e2ba5d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -981,7 +981,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
981 | while (bio) { /* submit pending writes */ | 981 | while (bio) { /* submit pending writes */ |
982 | struct bio *next = bio->bi_next; | 982 | struct bio *next = bio->bi_next; |
983 | bio->bi_next = NULL; | 983 | bio->bi_next = NULL; |
984 | generic_make_request(bio); | 984 | if (unlikely((bio->bi_rw & REQ_DISCARD) && |
985 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) | ||
986 | /* Just ignore it */ | ||
987 | bio_endio(bio, 0); | ||
988 | else | ||
989 | generic_make_request(bio); | ||
985 | bio = next; | 990 | bio = next; |
986 | } | 991 | } |
987 | kfree(plug); | 992 | kfree(plug); |
@@ -2901,6 +2906,7 @@ static int stop(struct mddev *mddev) | |||
2901 | if (conf->r1bio_pool) | 2906 | if (conf->r1bio_pool) |
2902 | mempool_destroy(conf->r1bio_pool); | 2907 | mempool_destroy(conf->r1bio_pool); |
2903 | kfree(conf->mirrors); | 2908 | kfree(conf->mirrors); |
2909 | safe_put_page(conf->tmppage); | ||
2904 | kfree(conf->poolinfo); | 2910 | kfree(conf->poolinfo); |
2905 | kfree(conf); | 2911 | kfree(conf); |
2906 | mddev->private = NULL; | 2912 | mddev->private = NULL; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 77b562d18a90..018741ba9310 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1133,7 +1133,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1133 | while (bio) { /* submit pending writes */ | 1133 | while (bio) { /* submit pending writes */ |
1134 | struct bio *next = bio->bi_next; | 1134 | struct bio *next = bio->bi_next; |
1135 | bio->bi_next = NULL; | 1135 | bio->bi_next = NULL; |
1136 | generic_make_request(bio); | 1136 | if (unlikely((bio->bi_rw & REQ_DISCARD) && |
1137 | !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) | ||
1138 | /* Just ignore it */ | ||
1139 | bio_endio(bio, 0); | ||
1140 | else | ||
1141 | generic_make_request(bio); | ||
1137 | bio = next; | 1142 | bio = next; |
1138 | } | 1143 | } |
1139 | kfree(plug); | 1144 | kfree(plug); |
@@ -2913,6 +2918,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2913 | if (init_resync(conf)) | 2918 | if (init_resync(conf)) |
2914 | return 0; | 2919 | return 0; |
2915 | 2920 | ||
2921 | /* | ||
2922 | * Allow skipping a full rebuild for incremental assembly | ||
2923 | * of a clean array, like RAID1 does. | ||
2924 | */ | ||
2925 | if (mddev->bitmap == NULL && | ||
2926 | mddev->recovery_cp == MaxSector && | ||
2927 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | ||
2928 | conf->fullsync == 0) { | ||
2929 | *skipped = 1; | ||
2930 | max_sector = mddev->dev_sectors; | ||
2931 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || | ||
2932 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | ||
2933 | max_sector = mddev->resync_max_sectors; | ||
2934 | return max_sector - sector_nr; | ||
2935 | } | ||
2936 | |||
2916 | skipped: | 2937 | skipped: |
2917 | max_sector = mddev->dev_sectors; | 2938 | max_sector = mddev->dev_sectors; |
2918 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || | 2939 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || |
@@ -3810,6 +3831,7 @@ static int stop(struct mddev *mddev) | |||
3810 | 3831 | ||
3811 | if (conf->r10bio_pool) | 3832 | if (conf->r10bio_pool) |
3812 | mempool_destroy(conf->r10bio_pool); | 3833 | mempool_destroy(conf->r10bio_pool); |
3834 | safe_put_page(conf->tmppage); | ||
3813 | kfree(conf->mirrors); | 3835 | kfree(conf->mirrors); |
3814 | kfree(conf); | 3836 | kfree(conf); |
3815 | mddev->private = NULL; | 3837 | mddev->private = NULL; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f4e87bfc7567..4a7be455d6d8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1887,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
1887 | &rdev->mddev->recovery); | 1887 | &rdev->mddev->recovery); |
1888 | } else if (is_badblock(rdev, sh->sector, | 1888 | } else if (is_badblock(rdev, sh->sector, |
1889 | STRIPE_SECTORS, | 1889 | STRIPE_SECTORS, |
1890 | &first_bad, &bad_sectors)) | 1890 | &first_bad, &bad_sectors)) { |
1891 | set_bit(R5_MadeGood, &sh->dev[i].flags); | 1891 | set_bit(R5_MadeGood, &sh->dev[i].flags); |
1892 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) | ||
1893 | /* That was a successful write so make | ||
1894 | * sure it looks like we already did | ||
1895 | * a re-write. | ||
1896 | */ | ||
1897 | set_bit(R5_ReWrite, &sh->dev[i].flags); | ||
1898 | } | ||
1892 | } | 1899 | } |
1893 | rdev_dec_pending(rdev, conf->mddev); | 1900 | rdev_dec_pending(rdev, conf->mddev); |
1894 | 1901 | ||
@@ -4672,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int | |||
4672 | *skipped = 1; | 4679 | *skipped = 1; |
4673 | return rv; | 4680 | return rv; |
4674 | } | 4681 | } |
4675 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && | 4682 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && |
4676 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | 4683 | !conf->fullsync && |
4677 | !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { | 4684 | !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && |
4685 | sync_blocks >= STRIPE_SECTORS) { | ||
4678 | /* we can skip this block, and probably more */ | 4686 | /* we can skip this block, and probably more */ |
4679 | sync_blocks /= STRIPE_SECTORS; | 4687 | sync_blocks /= STRIPE_SECTORS; |
4680 | *skipped = 1; | 4688 | *skipped = 1; |