aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/dm-raid.txt84
-rw-r--r--Documentation/md.txt16
-rw-r--r--drivers/md/bitmap.c4
-rw-r--r--drivers/md/dm-raid.c111
-rw-r--r--drivers/md/md.c235
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid1.c8
-rw-r--r--drivers/md/raid10.c24
-rw-r--r--drivers/md/raid5.c16
9 files changed, 368 insertions, 131 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index b428556197c9..e9192283e5a5 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -1,10 +1,13 @@
1dm-raid 1dm-raid
2------- 2=======
3 3
4The device-mapper RAID (dm-raid) target provides a bridge from DM to MD. 4The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
5It allows the MD RAID drivers to be accessed using a device-mapper 5It allows the MD RAID drivers to be accessed using a device-mapper
6interface. 6interface.
7 7
8
9Mapping Table Interface
10-----------------------
8The target is named "raid" and it accepts the following parameters: 11The target is named "raid" and it accepts the following parameters:
9 12
10 <raid_type> <#raid_params> <raid_params> \ 13 <raid_type> <#raid_params> <raid_params> \
@@ -47,7 +50,7 @@ The target is named "raid" and it accepts the following parameters:
47 followed by optional parameters (in any order): 50 followed by optional parameters (in any order):
48 [sync|nosync] Force or prevent RAID initialization. 51 [sync|nosync] Force or prevent RAID initialization.
49 52
50 [rebuild <idx>] Rebuild drive number idx (first drive is 0). 53 [rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
51 54
52 [daemon_sleep <ms>] 55 [daemon_sleep <ms>]
53 Interval between runs of the bitmap daemon that 56 Interval between runs of the bitmap daemon that
@@ -56,9 +59,9 @@ The target is named "raid" and it accepts the following parameters:
56 59
57 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization 60 [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
58 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization 61 [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
59 [write_mostly <idx>] Drive index is write-mostly 62 [write_mostly <idx>] Mark drive index 'idx' write-mostly.
60 [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 63 [max_write_behind <sectors>] See '--write-behind=' (man mdadm)
61 [stripe_cache <sectors>] Stripe cache size (higher RAIDs only) 64 [stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
62 [region_size <sectors>] 65 [region_size <sectors>]
63 The region_size multiplied by the number of regions is the 66 The region_size multiplied by the number of regions is the
64 logical size of the array. The bitmap records the device 67 logical size of the array. The bitmap records the device
@@ -122,7 +125,7 @@ The target is named "raid" and it accepts the following parameters:
122 given for both the metadata and data drives for a given position. 125 given for both the metadata and data drives for a given position.
123 126
124 127
125Example tables 128Example Tables
126-------------- 129--------------
127# RAID4 - 4 data drives, 1 parity (no metadata devices) 130# RAID4 - 4 data drives, 1 parity (no metadata devices)
128# No metadata devices specified to hold superblock/bitmap info 131# No metadata devices specified to hold superblock/bitmap info
@@ -141,26 +144,70 @@ Example tables
141 raid4 4 2048 sync min_recovery_rate 20 \ 144 raid4 4 2048 sync min_recovery_rate 20 \
142 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82 145 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
143 146
147
148Status Output
149-------------
144'dmsetup table' displays the table used to construct the mapping. 150'dmsetup table' displays the table used to construct the mapping.
145The optional parameters are always printed in the order listed 151The optional parameters are always printed in the order listed
146above with "sync" or "nosync" always output ahead of the other 152above with "sync" or "nosync" always output ahead of the other
147arguments, regardless of the order used when originally loading the table. 153arguments, regardless of the order used when originally loading the table.
148Arguments that can be repeated are ordered by value. 154Arguments that can be repeated are ordered by value.
149 155
150'dmsetup status' yields information on the state and health of the 156
151array. 157'dmsetup status' yields information on the state and health of the array.
152The output is as follows: 158The output is as follows (normally a single line, but expanded here for
159clarity):
1531: <s> <l> raid \ 1601: <s> <l> raid \
1542: <raid_type> <#devices> <1 health char for each dev> <resync_ratio> 1612: <raid_type> <#devices> <health_chars> \
1623: <sync_ratio> <sync_action> <mismatch_cnt>
155 163
156Line 1 is the standard output produced by device-mapper. 164Line 1 is the standard output produced by device-mapper.
157Line 2 is produced by the raid target, and best explained by example: 165Line 2 & 3 are produced by the raid target and are best explained by example:
158 0 1960893648 raid raid4 5 AAAAA 2/490221568 166 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
159Here we can see the RAID type is raid4, there are 5 devices - all of 167Here we can see the RAID type is raid4, there are 5 devices - all of
160which are 'A'live, and the array is 2/490221568 complete with recovery. 168which are 'A'live, and the array is 2/490221568 complete with its initial
161Faulty or missing devices are marked 'D'. Devices that are out-of-sync 169recovery. Here is a fuller description of the individual fields:
162are marked 'a'. 170 <raid_type> Same as the <raid_type> used to create the array.
163 171 <health_chars> One char for each device, indicating: 'A' = alive and
172 in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
173 <sync_ratio> The ratio indicating how much of the array has undergone
174 the process described by 'sync_action'. If the
175 'sync_action' is "check" or "repair", then the process
176 of "resync" or "recover" can be considered complete.
177 <sync_action> One of the following possible states:
178 idle - No synchronization action is being performed.
179 frozen - The current action has been halted.
180 resync - Array is undergoing its initial synchronization
181 or is resynchronizing after an unclean shutdown
182 (possibly aided by a bitmap).
183 recover - A device in the array is being rebuilt or
184 replaced.
185 check - A user-initiated full check of the array is
186 being performed. All blocks are read and
187 checked for consistency. The number of
188 discrepancies found are recorded in
189 <mismatch_cnt>. No changes are made to the
190 array by this action.
191 repair - The same as "check", but discrepancies are
192 corrected.
193 reshape - The array is undergoing a reshape.
194 <mismatch_cnt> The number of discrepancies found between mirror copies
195 in RAID1/10 or wrong parity values found in RAID4/5/6.
196 This value is valid only after a "check" of the array
197 is performed. A healthy array has a 'mismatch_cnt' of 0.
198
199Message Interface
200-----------------
201The dm-raid target will accept certain actions through the 'message' interface.
202('man dmsetup' for more information on the message interface.) These actions
203include:
204 "idle" - Halt the current sync action.
205 "frozen" - Freeze the current sync action.
206 "resync" - Initiate/continue a resync.
207 "recover"- Initiate/continue a recover process.
208 "check" - Initiate a check (i.e. a "scrub") of the array.
209 "repair" - Initiate a repair of the array.
210 "reshape"- Currently unsupported (-EINVAL).
164 211
165Version History 212Version History
166--------------- 213---------------
@@ -171,4 +218,7 @@ Version History
1711.3.1 Allow device replacement/rebuild for RAID 10 2181.3.1 Allow device replacement/rebuild for RAID 10
1721.3.2 Fix/improve redundancy checking for RAID10 2191.3.2 Fix/improve redundancy checking for RAID10
1731.4.0 Non-functional change. Removes arg from mapping function. 2201.4.0 Non-functional change. Removes arg from mapping function.
1741.4.1 Add RAID10 "far" and "offset" algorithm support. 2211.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
2221.4.2 Add RAID10 "far" and "offset" algorithm support.
2231.5.0 Add message interface to allow manipulation of the sync_action.
224 New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 993fba37b7d1..e0ddd327632d 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -119,7 +119,7 @@ device to add.
119The array is started with the RUN_ARRAY ioctl. 119The array is started with the RUN_ARRAY ioctl.
120 120
121Once started, new devices can be added. They should have an 121Once started, new devices can be added. They should have an
122appropriate superblock written to them, and then passed be in with 122appropriate superblock written to them, and then be passed in with
123ADD_NEW_DISK. 123ADD_NEW_DISK.
124 124
125Devices that have failed or are not yet active can be detached from an 125Devices that have failed or are not yet active can be detached from an
@@ -131,7 +131,7 @@ Specific Rules that apply to format-0 super block arrays, and
131------------------------------------------------------------- 131-------------------------------------------------------------
132 132
133An array can be 'created' by describing the array (level, chunksize 133An array can be 'created' by describing the array (level, chunksize
134etc) in a SET_ARRAY_INFO ioctl. This must has major_version==0 and 134etc) in a SET_ARRAY_INFO ioctl. This must have major_version==0 and
135raid_disks != 0. 135raid_disks != 0.
136 136
137Then uninitialized devices can be added with ADD_NEW_DISK. The 137Then uninitialized devices can be added with ADD_NEW_DISK. The
@@ -426,7 +426,7 @@ Each directory contains:
426 offset 426 offset
427 This gives the location in the device (in sectors from the 427 This gives the location in the device (in sectors from the
428 start) where data from the array will be stored. Any part of 428 start) where data from the array will be stored. Any part of
429 the device before this offset us not touched, unless it is 429 the device before this offset is not touched, unless it is
430 used for storing metadata (Formats 1.1 and 1.2). 430 used for storing metadata (Formats 1.1 and 1.2).
431 431
432 size 432 size
@@ -440,7 +440,7 @@ Each directory contains:
440 When the device is not 'in_sync', this records the number of 440 When the device is not 'in_sync', this records the number of
441 sectors from the start of the device which are known to be 441 sectors from the start of the device which are known to be
442 correct. This is normally zero, but during a recovery 442 correct. This is normally zero, but during a recovery
443 operation is will steadily increase, and if the recovery is 443 operation it will steadily increase, and if the recovery is
444 interrupted, restoring this value can cause recovery to 444 interrupted, restoring this value can cause recovery to
445 avoid repeating the earlier blocks. With v1.x metadata, this 445 avoid repeating the earlier blocks. With v1.x metadata, this
446 value is saved and restored automatically. 446 value is saved and restored automatically.
@@ -468,7 +468,7 @@ Each directory contains:
468 468
469 469
470 470
471An active md device will also contain and entry for each active device 471An active md device will also contain an entry for each active device
472in the array. These are named 472in the array. These are named
473 473
474 rdNN 474 rdNN
@@ -482,7 +482,7 @@ will show 'in_sync' on every line.
482 482
483 483
484 484
485Active md devices for levels that support data redundancy (1,4,5,6) 485Active md devices for levels that support data redundancy (1,4,5,6,10)
486also have 486also have
487 487
488 sync_action 488 sync_action
@@ -494,7 +494,7 @@ also have
494 failed/missing device 494 failed/missing device
495 idle - nothing is happening 495 idle - nothing is happening
496 check - A full check of redundancy was requested and is 496 check - A full check of redundancy was requested and is
497 happening. This reads all block and checks 497 happening. This reads all blocks and checks
498 them. A repair may also happen for some raid 498 them. A repair may also happen for some raid
499 levels. 499 levels.
500 repair - A full check and repair is happening. This is 500 repair - A full check and repair is happening. This is
@@ -522,7 +522,7 @@ also have
522 522
523 degraded 523 degraded
524 This contains a count of the number of devices by which the 524 This contains a count of the number of devices by which the
525 arrays is degraded. So an optimal array with show '0'. A 525 arrays is degraded. So an optimal array will show '0'. A
526 single failed/missing drive will show '1', etc. 526 single failed/missing drive will show '1', etc.
527 This file responds to select/poll, any increase or decrease 527 This file responds to select/poll, any increase or decrease
528 in the count of missing devices will trigger an event. 528 in the count of missing devices will trigger an event.
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 4fd9d6aeff6a..5a2c75499824 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
846 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 846 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
847 set_bit(bit, kaddr); 847 set_bit(bit, kaddr);
848 else 848 else
849 test_and_set_bit_le(bit, kaddr); 849 set_bit_le(bit, kaddr);
850 kunmap_atomic(kaddr); 850 kunmap_atomic(kaddr);
851 pr_debug("set file bit %lu page %lu\n", bit, page->index); 851 pr_debug("set file bit %lu page %lu\n", bit, page->index);
852 /* record page number so it gets flushed to disk when unplug occurs */ 852 /* record page number so it gets flushed to disk when unplug occurs */
@@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
868 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 868 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
869 clear_bit(bit, paddr); 869 clear_bit(bit, paddr);
870 else 870 else
871 test_and_clear_bit_le(bit, paddr); 871 clear_bit_le(bit, paddr);
872 kunmap_atomic(paddr); 872 kunmap_atomic(paddr);
873 if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) { 873 if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
874 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING); 874 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 311e3d35b272..1d3fe1a40a9b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
1279 return DM_MAPIO_SUBMITTED; 1279 return DM_MAPIO_SUBMITTED;
1280} 1280}
1281 1281
1282static const char *decipher_sync_action(struct mddev *mddev)
1283{
1284 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
1285 return "frozen";
1286
1287 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
1288 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
1289 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
1290 return "reshape";
1291
1292 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
1293 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1294 return "resync";
1295 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1296 return "check";
1297 return "repair";
1298 }
1299
1300 if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
1301 return "recover";
1302 }
1303
1304 return "idle";
1305}
1306
1282static void raid_status(struct dm_target *ti, status_type_t type, 1307static void raid_status(struct dm_target *ti, status_type_t type,
1283 unsigned status_flags, char *result, unsigned maxlen) 1308 unsigned status_flags, char *result, unsigned maxlen)
1284{ 1309{
@@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1298 sync = rs->md.recovery_cp; 1323 sync = rs->md.recovery_cp;
1299 1324
1300 if (sync >= rs->md.resync_max_sectors) { 1325 if (sync >= rs->md.resync_max_sectors) {
1326 /*
1327 * Sync complete.
1328 */
1301 array_in_sync = 1; 1329 array_in_sync = 1;
1302 sync = rs->md.resync_max_sectors; 1330 sync = rs->md.resync_max_sectors;
1331 } else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
1332 /*
1333 * If "check" or "repair" is occurring, the array has
1334 * undergone and initial sync and the health characters
1335 * should not be 'a' anymore.
1336 */
1337 array_in_sync = 1;
1303 } else { 1338 } else {
1304 /* 1339 /*
1305 * The array may be doing an initial sync, or it may 1340 * The array may be doing an initial sync, or it may
@@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1311 if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) 1346 if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
1312 array_in_sync = 1; 1347 array_in_sync = 1;
1313 } 1348 }
1349
1314 /* 1350 /*
1315 * Status characters: 1351 * Status characters:
1316 * 'D' = Dead/Failed device 1352 * 'D' = Dead/Failed device
@@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1339 (unsigned long long) sync, 1375 (unsigned long long) sync,
1340 (unsigned long long) rs->md.resync_max_sectors); 1376 (unsigned long long) rs->md.resync_max_sectors);
1341 1377
1378 /*
1379 * Sync action:
1380 * See Documentation/device-mapper/dm-raid.c for
1381 * information on each of these states.
1382 */
1383 DMEMIT(" %s", decipher_sync_action(&rs->md));
1384
1385 /*
1386 * resync_mismatches/mismatch_cnt
1387 * This field shows the number of discrepancies found when
1388 * performing a "check" of the array.
1389 */
1390 DMEMIT(" %llu",
1391 (unsigned long long)
1392 atomic64_read(&rs->md.resync_mismatches));
1342 break; 1393 break;
1343 case STATUSTYPE_TABLE: 1394 case STATUSTYPE_TABLE:
1344 /* The string you would use to construct this array */ 1395 /* The string you would use to construct this array */
@@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1425 } 1476 }
1426} 1477}
1427 1478
1428static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) 1479static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
1480{
1481 struct raid_set *rs = ti->private;
1482 struct mddev *mddev = &rs->md;
1483
1484 if (!strcasecmp(argv[0], "reshape")) {
1485 DMERR("Reshape not supported.");
1486 return -EINVAL;
1487 }
1488
1489 if (!mddev->pers || !mddev->pers->sync_request)
1490 return -EINVAL;
1491
1492 if (!strcasecmp(argv[0], "frozen"))
1493 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
1494 else
1495 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
1496
1497 if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
1498 if (mddev->sync_thread) {
1499 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1500 md_reap_sync_thread(mddev);
1501 }
1502 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
1503 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
1504 return -EBUSY;
1505 else if (!strcasecmp(argv[0], "resync"))
1506 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1507 else if (!strcasecmp(argv[0], "recover")) {
1508 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
1509 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1510 } else {
1511 if (!strcasecmp(argv[0], "check"))
1512 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
1513 else if (!!strcasecmp(argv[0], "repair"))
1514 return -EINVAL;
1515 set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
1516 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
1517 }
1518 if (mddev->ro == 2) {
1519 /* A write to sync_action is enough to justify
1520 * canceling read-auto mode
1521 */
1522 mddev->ro = 0;
1523 if (!mddev->suspended)
1524 md_wakeup_thread(mddev->sync_thread);
1525 }
1526 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
1527 if (!mddev->suspended)
1528 md_wakeup_thread(mddev->thread);
1529
1530 return 0;
1531}
1532
1533static int raid_iterate_devices(struct dm_target *ti,
1534 iterate_devices_callout_fn fn, void *data)
1429{ 1535{
1430 struct raid_set *rs = ti->private; 1536 struct raid_set *rs = ti->private;
1431 unsigned i; 1537 unsigned i;
@@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti)
1482 1588
1483static struct target_type raid_target = { 1589static struct target_type raid_target = {
1484 .name = "raid", 1590 .name = "raid",
1485 .version = {1, 4, 2}, 1591 .version = {1, 5, 0},
1486 .module = THIS_MODULE, 1592 .module = THIS_MODULE,
1487 .ctr = raid_ctr, 1593 .ctr = raid_ctr,
1488 .dtr = raid_dtr, 1594 .dtr = raid_dtr,
1489 .map = raid_map, 1595 .map = raid_map,
1490 .status = raid_status, 1596 .status = raid_status,
1597 .message = raid_message,
1491 .iterate_devices = raid_iterate_devices, 1598 .iterate_devices = raid_iterate_devices,
1492 .io_hints = raid_io_hints, 1599 .io_hints = raid_io_hints,
1493 .presuspend = raid_presuspend, 1600 .presuspend = raid_presuspend,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aeceedfc530b..4c74424c78b0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
72static struct workqueue_struct *md_wq; 72static struct workqueue_struct *md_wq;
73static struct workqueue_struct *md_misc_wq; 73static struct workqueue_struct *md_misc_wq;
74 74
75static int remove_and_add_spares(struct mddev *mddev,
76 struct md_rdev *this);
77
75#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 78#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
76 79
77/* 80/*
@@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1564 sector, count, 1) == 0) 1567 sector, count, 1) == 0)
1565 return -EINVAL; 1568 return -EINVAL;
1566 } 1569 }
1567 } else if (sb->bblog_offset == 0) 1570 } else if (sb->bblog_offset != 0)
1568 rdev->badblocks.shift = -1; 1571 rdev->badblocks.shift = 0;
1569 1572
1570 if (!refdev) { 1573 if (!refdev) {
1571 ret = 1; 1574 ret = 1;
@@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)
2411 int nospares = 0; 2414 int nospares = 0;
2412 int any_badblocks_changed = 0; 2415 int any_badblocks_changed = 0;
2413 2416
2417 if (mddev->ro) {
2418 if (force_change)
2419 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2420 return;
2421 }
2414repeat: 2422repeat:
2415 /* First make sure individual recovery_offsets are correct */ 2423 /* First make sure individual recovery_offsets are correct */
2416 rdev_for_each(rdev, mddev) { 2424 rdev_for_each(rdev, mddev) {
@@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
2800 /* personality does all needed checks */ 2808 /* personality does all needed checks */
2801 if (rdev->mddev->pers->hot_remove_disk == NULL) 2809 if (rdev->mddev->pers->hot_remove_disk == NULL)
2802 return -EINVAL; 2810 return -EINVAL;
2803 err = rdev->mddev->pers-> 2811 clear_bit(Blocked, &rdev->flags);
2804 hot_remove_disk(rdev->mddev, rdev); 2812 remove_and_add_spares(rdev->mddev, rdev);
2805 if (err) 2813 if (rdev->raid_disk >= 0)
2806 return err; 2814 return -EBUSY;
2807 sysfs_unlink_rdev(rdev->mddev, rdev);
2808 rdev->raid_disk = -1;
2809 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); 2815 set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
2810 md_wakeup_thread(rdev->mddev->thread); 2816 md_wakeup_thread(rdev->mddev->thread);
2811 } else if (rdev->mddev->pers) { 2817 } else if (rdev->mddev->pers) {
@@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev)
3221 * be used - I wonder if that matters 3227 * be used - I wonder if that matters
3222 */ 3228 */
3223 rdev->badblocks.count = 0; 3229 rdev->badblocks.count = 0;
3224 rdev->badblocks.shift = 0; 3230 rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
3225 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); 3231 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3226 seqlock_init(&rdev->badblocks.lock); 3232 seqlock_init(&rdev->badblocks.lock);
3227 if (rdev->badblocks.page == NULL) 3233 if (rdev->badblocks.page == NULL)
@@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
3293 goto abort_free; 3299 goto abort_free;
3294 } 3300 }
3295 } 3301 }
3296 if (super_format == -1)
3297 /* hot-add for 0.90, or non-persistent: so no badblocks */
3298 rdev->badblocks.shift = -1;
3299 3302
3300 return rdev; 3303 return rdev;
3301 3304
@@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page)
4225 return sprintf(page, "%s\n", type); 4228 return sprintf(page, "%s\n", type);
4226} 4229}
4227 4230
4228static void reap_sync_thread(struct mddev *mddev);
4229
4230static ssize_t 4231static ssize_t
4231action_store(struct mddev *mddev, const char *page, size_t len) 4232action_store(struct mddev *mddev, const char *page, size_t len)
4232{ 4233{
@@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4241 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) { 4242 if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
4242 if (mddev->sync_thread) { 4243 if (mddev->sync_thread) {
4243 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4244 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4244 reap_sync_thread(mddev); 4245 md_reap_sync_thread(mddev);
4245 } 4246 }
4246 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 4247 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4247 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) 4248 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev)
5279 if (mddev->sync_thread) { 5280 if (mddev->sync_thread) {
5280 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 5281 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5281 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 5282 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5282 reap_sync_thread(mddev); 5283 md_reap_sync_thread(mddev);
5283 } 5284 }
5284 5285
5285 del_timer_sync(&mddev->safemode_timer); 5286 del_timer_sync(&mddev->safemode_timer);
@@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev)
5287 bitmap_flush(mddev); 5288 bitmap_flush(mddev);
5288 md_super_wait(mddev); 5289 md_super_wait(mddev);
5289 5290
5290 if (!mddev->in_sync || mddev->flags) { 5291 if (mddev->ro == 0 &&
5292 (!mddev->in_sync || mddev->flags)) {
5291 /* mark array as shutdown cleanly */ 5293 /* mark array as shutdown cleanly */
5292 mddev->in_sync = 1; 5294 mddev->in_sync = 1;
5293 md_update_sb(mddev, 1); 5295 md_update_sb(mddev, 1);
@@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5810 else 5812 else
5811 sysfs_notify_dirent_safe(rdev->sysfs_state); 5813 sysfs_notify_dirent_safe(rdev->sysfs_state);
5812 5814
5813 md_update_sb(mddev, 1); 5815 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5814 if (mddev->degraded) 5816 if (mddev->degraded)
5815 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 5817 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
5816 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5818 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)
5877 if (!rdev) 5879 if (!rdev)
5878 return -ENXIO; 5880 return -ENXIO;
5879 5881
5882 clear_bit(Blocked, &rdev->flags);
5883 remove_and_add_spares(mddev, rdev);
5884
5880 if (rdev->raid_disk >= 0) 5885 if (rdev->raid_disk >= 0)
5881 goto busy; 5886 goto busy;
5882 5887
@@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6490 err = md_set_readonly(mddev, bdev); 6495 err = md_set_readonly(mddev, bdev);
6491 goto done_unlock; 6496 goto done_unlock;
6492 6497
6498 case HOT_REMOVE_DISK:
6499 err = hot_remove_disk(mddev, new_decode_dev(arg));
6500 goto done_unlock;
6501
6502 case ADD_NEW_DISK:
6503 /* We can support ADD_NEW_DISK on read-only arrays
6504 * on if we are re-adding a preexisting device.
6505 * So require mddev->pers and MD_DISK_SYNC.
6506 */
6507 if (mddev->pers) {
6508 mdu_disk_info_t info;
6509 if (copy_from_user(&info, argp, sizeof(info)))
6510 err = -EFAULT;
6511 else if (!(info.state & (1<<MD_DISK_SYNC)))
6512 /* Need to clear read-only for this */
6513 break;
6514 else
6515 err = add_new_disk(mddev, &info);
6516 goto done_unlock;
6517 }
6518 break;
6519
6493 case BLKROSET: 6520 case BLKROSET:
6494 if (get_user(ro, (int __user *)(arg))) { 6521 if (get_user(ro, (int __user *)(arg))) {
6495 err = -EFAULT; 6522 err = -EFAULT;
@@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6560 goto done_unlock; 6587 goto done_unlock;
6561 } 6588 }
6562 6589
6563 case HOT_REMOVE_DISK:
6564 err = hot_remove_disk(mddev, new_decode_dev(arg));
6565 goto done_unlock;
6566
6567 case HOT_ADD_DISK: 6590 case HOT_ADD_DISK:
6568 err = hot_add_disk(mddev, new_decode_dev(arg)); 6591 err = hot_add_disk(mddev, new_decode_dev(arg));
6569 goto done_unlock; 6592 goto done_unlock;
@@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread)
7644} 7667}
7645EXPORT_SYMBOL_GPL(md_do_sync); 7668EXPORT_SYMBOL_GPL(md_do_sync);
7646 7669
7647static int remove_and_add_spares(struct mddev *mddev) 7670static int remove_and_add_spares(struct mddev *mddev,
7671 struct md_rdev *this)
7648{ 7672{
7649 struct md_rdev *rdev; 7673 struct md_rdev *rdev;
7650 int spares = 0; 7674 int spares = 0;
7651 int removed = 0; 7675 int removed = 0;
7652 7676
7653 rdev_for_each(rdev, mddev) 7677 rdev_for_each(rdev, mddev)
7654 if (rdev->raid_disk >= 0 && 7678 if ((this == NULL || rdev == this) &&
7679 rdev->raid_disk >= 0 &&
7655 !test_bit(Blocked, &rdev->flags) && 7680 !test_bit(Blocked, &rdev->flags) &&
7656 (test_bit(Faulty, &rdev->flags) || 7681 (test_bit(Faulty, &rdev->flags) ||
7657 ! test_bit(In_sync, &rdev->flags)) && 7682 ! test_bit(In_sync, &rdev->flags)) &&
@@ -7666,74 +7691,52 @@ static int remove_and_add_spares(struct mddev *mddev)
7666 if (removed && mddev->kobj.sd) 7691 if (removed && mddev->kobj.sd)
7667 sysfs_notify(&mddev->kobj, NULL, "degraded"); 7692 sysfs_notify(&mddev->kobj, NULL, "degraded");
7668 7693
7694 if (this)
7695 goto no_add;
7696
7669 rdev_for_each(rdev, mddev) { 7697 rdev_for_each(rdev, mddev) {
7670 if (rdev->raid_disk >= 0 && 7698 if (rdev->raid_disk >= 0 &&
7671 !test_bit(In_sync, &rdev->flags) && 7699 !test_bit(In_sync, &rdev->flags) &&
7672 !test_bit(Faulty, &rdev->flags)) 7700 !test_bit(Faulty, &rdev->flags))
7673 spares++; 7701 spares++;
7674 if (rdev->raid_disk < 0 7702 if (rdev->raid_disk >= 0)
7675 && !test_bit(Faulty, &rdev->flags)) { 7703 continue;
7676 rdev->recovery_offset = 0; 7704 if (test_bit(Faulty, &rdev->flags))
7677 if (mddev->pers-> 7705 continue;
7678 hot_add_disk(mddev, rdev) == 0) { 7706 if (mddev->ro &&
7679 if (sysfs_link_rdev(mddev, rdev)) 7707 rdev->saved_raid_disk < 0)
7680 /* failure here is OK */; 7708 continue;
7681 spares++; 7709
7682 md_new_event(mddev); 7710 rdev->recovery_offset = 0;
7683 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7711 if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
7684 } 7712 spin_lock_irq(&mddev->write_lock);
7713 if (mddev->in_sync)
7714 /* OK, this device, which is in_sync,
7715 * will definitely be noticed before
7716 * the next write, so recovery isn't
7717 * needed.
7718 */
7719 rdev->recovery_offset = mddev->recovery_cp;
7720 spin_unlock_irq(&mddev->write_lock);
7721 }
7722 if (mddev->ro && rdev->recovery_offset != MaxSector)
7723 /* not safe to add this disk now */
7724 continue;
7725 if (mddev->pers->
7726 hot_add_disk(mddev, rdev) == 0) {
7727 if (sysfs_link_rdev(mddev, rdev))
7728 /* failure here is OK */;
7729 spares++;
7730 md_new_event(mddev);
7731 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7685 } 7732 }
7686 } 7733 }
7734no_add:
7687 if (removed) 7735 if (removed)
7688 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7736 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7689 return spares; 7737 return spares;
7690} 7738}
7691 7739
7692static void reap_sync_thread(struct mddev *mddev)
7693{
7694 struct md_rdev *rdev;
7695
7696 /* resync has finished, collect result */
7697 md_unregister_thread(&mddev->sync_thread);
7698 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7699 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7700 /* success...*/
7701 /* activate any spares */
7702 if (mddev->pers->spare_active(mddev)) {
7703 sysfs_notify(&mddev->kobj, NULL,
7704 "degraded");
7705 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7706 }
7707 }
7708 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7709 mddev->pers->finish_reshape)
7710 mddev->pers->finish_reshape(mddev);
7711
7712 /* If array is no-longer degraded, then any saved_raid_disk
7713 * information must be scrapped. Also if any device is now
7714 * In_sync we must scrape the saved_raid_disk for that device
7715 * do the superblock for an incrementally recovered device
7716 * written out.
7717 */
7718 rdev_for_each(rdev, mddev)
7719 if (!mddev->degraded ||
7720 test_bit(In_sync, &rdev->flags))
7721 rdev->saved_raid_disk = -1;
7722
7723 md_update_sb(mddev, 1);
7724 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7725 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7726 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7727 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7728 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7729 /* flag recovery needed just to double check */
7730 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7731 sysfs_notify_dirent_safe(mddev->sysfs_action);
7732 md_new_event(mddev);
7733 if (mddev->event_work.func)
7734 queue_work(md_misc_wq, &mddev->event_work);
7735}
7736
7737/* 7740/*
7738 * This routine is regularly called by all per-raid-array threads to 7741 * This routine is regularly called by all per-raid-array threads to
7739 * deal with generic issues like resync and super-block update. 7742 * deal with generic issues like resync and super-block update.
@@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev)
7789 int spares = 0; 7792 int spares = 0;
7790 7793
7791 if (mddev->ro) { 7794 if (mddev->ro) {
7792 /* Only thing we do on a ro array is remove 7795 /* On a read-only array we can:
7793 * failed devices. 7796 * - remove failed devices
7797 * - add already-in_sync devices if the array itself
7798 * is in-sync.
7799 * As we only add devices that are already in-sync,
7800 * we can activate the spares immediately.
7794 */ 7801 */
7795 struct md_rdev *rdev;
7796 rdev_for_each(rdev, mddev)
7797 if (rdev->raid_disk >= 0 &&
7798 !test_bit(Blocked, &rdev->flags) &&
7799 test_bit(Faulty, &rdev->flags) &&
7800 atomic_read(&rdev->nr_pending)==0) {
7801 if (mddev->pers->hot_remove_disk(
7802 mddev, rdev) == 0) {
7803 sysfs_unlink_rdev(mddev, rdev);
7804 rdev->raid_disk = -1;
7805 }
7806 }
7807 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 7802 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7803 remove_and_add_spares(mddev, NULL);
7804 mddev->pers->spare_active(mddev);
7808 goto unlock; 7805 goto unlock;
7809 } 7806 }
7810 7807
@@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev)
7836 goto unlock; 7833 goto unlock;
7837 } 7834 }
7838 if (mddev->sync_thread) { 7835 if (mddev->sync_thread) {
7839 reap_sync_thread(mddev); 7836 md_reap_sync_thread(mddev);
7840 goto unlock; 7837 goto unlock;
7841 } 7838 }
7842 /* Set RUNNING before clearing NEEDED to avoid 7839 /* Set RUNNING before clearing NEEDED to avoid
@@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev)
7867 goto unlock; 7864 goto unlock;
7868 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 7865 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7869 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 7866 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7870 } else if ((spares = remove_and_add_spares(mddev))) { 7867 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
7871 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 7868 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7872 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 7869 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7873 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 7870 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev)
7917 } 7914 }
7918} 7915}
7919 7916
7917void md_reap_sync_thread(struct mddev *mddev)
7918{
7919 struct md_rdev *rdev;
7920
7921 /* resync has finished, collect result */
7922 md_unregister_thread(&mddev->sync_thread);
7923 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7924 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7925 /* success...*/
7926 /* activate any spares */
7927 if (mddev->pers->spare_active(mddev)) {
7928 sysfs_notify(&mddev->kobj, NULL,
7929 "degraded");
7930 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7931 }
7932 }
7933 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
7934 mddev->pers->finish_reshape)
7935 mddev->pers->finish_reshape(mddev);
7936
7937 /* If array is no-longer degraded, then any saved_raid_disk
7938 * information must be scrapped. Also if any device is now
7939 * In_sync we must scrape the saved_raid_disk for that device
7940 * do the superblock for an incrementally recovered device
7941 * written out.
7942 */
7943 rdev_for_each(rdev, mddev)
7944 if (!mddev->degraded ||
7945 test_bit(In_sync, &rdev->flags))
7946 rdev->saved_raid_disk = -1;
7947
7948 md_update_sb(mddev, 1);
7949 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7950 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7951 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7952 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7953 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7954 /* flag recovery needed just to double check */
7955 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
7956 sysfs_notify_dirent_safe(mddev->sysfs_action);
7957 md_new_event(mddev);
7958 if (mddev->event_work.func)
7959 queue_work(md_misc_wq, &mddev->event_work);
7960}
7961
7920void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) 7962void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
7921{ 7963{
7922 sysfs_notify_dirent_safe(rdev->sysfs_state); 7964 sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread);
8642EXPORT_SYMBOL(md_unregister_thread); 8684EXPORT_SYMBOL(md_unregister_thread);
8643EXPORT_SYMBOL(md_wakeup_thread); 8685EXPORT_SYMBOL(md_wakeup_thread);
8644EXPORT_SYMBOL(md_check_recovery); 8686EXPORT_SYMBOL(md_check_recovery);
8687EXPORT_SYMBOL(md_reap_sync_thread);
8645MODULE_LICENSE("GPL"); 8688MODULE_LICENSE("GPL");
8646MODULE_DESCRIPTION("MD RAID framework"); 8689MODULE_DESCRIPTION("MD RAID framework");
8647MODULE_ALIAS("md"); 8690MODULE_ALIAS("md");
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d90fb1a879e1..653f992b687a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread(
567extern void md_unregister_thread(struct md_thread **threadp); 567extern void md_unregister_thread(struct md_thread **threadp);
568extern void md_wakeup_thread(struct md_thread *thread); 568extern void md_wakeup_thread(struct md_thread *thread);
569extern void md_check_recovery(struct mddev *mddev); 569extern void md_check_recovery(struct mddev *mddev);
570extern void md_reap_sync_thread(struct mddev *mddev);
570extern void md_write_start(struct mddev *mddev, struct bio *bi); 571extern void md_write_start(struct mddev *mddev, struct bio *bi);
571extern void md_write_end(struct mddev *mddev); 572extern void md_write_end(struct mddev *mddev);
572extern void md_done_sync(struct mddev *mddev, int blocks, int ok); 573extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fd86b372692d..851023e2ba5d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -981,7 +981,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
981 while (bio) { /* submit pending writes */ 981 while (bio) { /* submit pending writes */
982 struct bio *next = bio->bi_next; 982 struct bio *next = bio->bi_next;
983 bio->bi_next = NULL; 983 bio->bi_next = NULL;
984 generic_make_request(bio); 984 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
985 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
986 /* Just ignore it */
987 bio_endio(bio, 0);
988 else
989 generic_make_request(bio);
985 bio = next; 990 bio = next;
986 } 991 }
987 kfree(plug); 992 kfree(plug);
@@ -2901,6 +2906,7 @@ static int stop(struct mddev *mddev)
2901 if (conf->r1bio_pool) 2906 if (conf->r1bio_pool)
2902 mempool_destroy(conf->r1bio_pool); 2907 mempool_destroy(conf->r1bio_pool);
2903 kfree(conf->mirrors); 2908 kfree(conf->mirrors);
2909 safe_put_page(conf->tmppage);
2904 kfree(conf->poolinfo); 2910 kfree(conf->poolinfo);
2905 kfree(conf); 2911 kfree(conf);
2906 mddev->private = NULL; 2912 mddev->private = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 77b562d18a90..018741ba9310 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1133,7 +1133,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1133 while (bio) { /* submit pending writes */ 1133 while (bio) { /* submit pending writes */
1134 struct bio *next = bio->bi_next; 1134 struct bio *next = bio->bi_next;
1135 bio->bi_next = NULL; 1135 bio->bi_next = NULL;
1136 generic_make_request(bio); 1136 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
1137 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
1138 /* Just ignore it */
1139 bio_endio(bio, 0);
1140 else
1141 generic_make_request(bio);
1137 bio = next; 1142 bio = next;
1138 } 1143 }
1139 kfree(plug); 1144 kfree(plug);
@@ -2913,6 +2918,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
2913 if (init_resync(conf)) 2918 if (init_resync(conf))
2914 return 0; 2919 return 0;
2915 2920
2921 /*
2922 * Allow skipping a full rebuild for incremental assembly
2923 * of a clean array, like RAID1 does.
2924 */
2925 if (mddev->bitmap == NULL &&
2926 mddev->recovery_cp == MaxSector &&
2927 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
2928 conf->fullsync == 0) {
2929 *skipped = 1;
2930 max_sector = mddev->dev_sectors;
2931 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
2932 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2933 max_sector = mddev->resync_max_sectors;
2934 return max_sector - sector_nr;
2935 }
2936
2916 skipped: 2937 skipped:
2917 max_sector = mddev->dev_sectors; 2938 max_sector = mddev->dev_sectors;
2918 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || 2939 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
@@ -3810,6 +3831,7 @@ static int stop(struct mddev *mddev)
3810 3831
3811 if (conf->r10bio_pool) 3832 if (conf->r10bio_pool)
3812 mempool_destroy(conf->r10bio_pool); 3833 mempool_destroy(conf->r10bio_pool);
3834 safe_put_page(conf->tmppage);
3813 kfree(conf->mirrors); 3835 kfree(conf->mirrors);
3814 kfree(conf); 3836 kfree(conf);
3815 mddev->private = NULL; 3837 mddev->private = NULL;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f4e87bfc7567..4a7be455d6d8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1887,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error)
1887 &rdev->mddev->recovery); 1887 &rdev->mddev->recovery);
1888 } else if (is_badblock(rdev, sh->sector, 1888 } else if (is_badblock(rdev, sh->sector,
1889 STRIPE_SECTORS, 1889 STRIPE_SECTORS,
1890 &first_bad, &bad_sectors)) 1890 &first_bad, &bad_sectors)) {
1891 set_bit(R5_MadeGood, &sh->dev[i].flags); 1891 set_bit(R5_MadeGood, &sh->dev[i].flags);
1892 if (test_bit(R5_ReadError, &sh->dev[i].flags))
1893 /* That was a successful write so make
1894 * sure it looks like we already did
1895 * a re-write.
1896 */
1897 set_bit(R5_ReWrite, &sh->dev[i].flags);
1898 }
1892 } 1899 }
1893 rdev_dec_pending(rdev, conf->mddev); 1900 rdev_dec_pending(rdev, conf->mddev);
1894 1901
@@ -4672,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
4672 *skipped = 1; 4679 *skipped = 1;
4673 return rv; 4680 return rv;
4674 } 4681 }
4675 if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && 4682 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
4676 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && 4683 !conf->fullsync &&
4677 !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { 4684 !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
4685 sync_blocks >= STRIPE_SECTORS) {
4678 /* we can skip this block, and probably more */ 4686 /* we can skip this block, and probably more */
4679 sync_blocks /= STRIPE_SECTORS; 4687 sync_blocks /= STRIPE_SECTORS;
4680 *skipped = 1; 4688 *skipped = 1;