aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/dm-raid.txt2
-rw-r--r--Documentation/md.txt13
-rw-r--r--drivers/md/bitmap.c8
-rw-r--r--drivers/md/dm-raid.c76
-rw-r--r--drivers/md/md.c53
-rw-r--r--drivers/md/md.h8
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c83
-rw-r--r--drivers/md/raid5.c6
10 files changed, 187 insertions, 70 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index e9192283e5a5..ef8ba9fa58c4 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -222,3 +222,5 @@ Version History
2221.4.2 Add RAID10 "far" and "offset" algorithm support. 2221.4.2 Add RAID10 "far" and "offset" algorithm support.
2231.5.0 Add message interface to allow manipulation of the sync_action. 2231.5.0 Add message interface to allow manipulation of the sync_action.
224 New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt. 224 New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
2251.5.1 Add ability to restore transiently failed devices on resume.
2261.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
diff --git a/Documentation/md.txt b/Documentation/md.txt
index e0ddd327632d..fbb2fcbf16b6 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -566,13 +566,6 @@ also have
566 when it reaches the current sync_max (below) and possibly at 566 when it reaches the current sync_max (below) and possibly at
567 other times. 567 other times.
568 568
569 sync_max
570 This is a number of sectors at which point a resync/recovery
571 process will pause. When a resync is active, the value can
572 only ever be increased, never decreased. The value of 'max'
573 effectively disables the limit.
574
575
576 sync_speed 569 sync_speed
577 This shows the current actual speed, in K/sec, of the current 570 This shows the current actual speed, in K/sec, of the current
578 sync_action. It is averaged over the last 30 seconds. 571 sync_action. It is averaged over the last 30 seconds.
@@ -593,6 +586,12 @@ also have
593 that number to reach sync_max. Then you can either increase 586 that number to reach sync_max. Then you can either increase
594 "sync_max", or can write 'idle' to "sync_action". 587 "sync_max", or can write 'idle' to "sync_action".
595 588
589 The value of 'max' for "sync_max" effectively disables the limit.
590 When a resync is active, the value can only ever be increased,
591 never decreased.
592 The value of '0' is the minimum for "sync_min".
593
594
596 595
597Each active md device may also have attributes specific to the 596Each active md device may also have attributes specific to the
598personality module that manages it. 597personality module that manages it.
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5a2c75499824..a7fd82133b12 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2002,9 +2002,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
2002 } else { 2002 } else {
2003 int rv; 2003 int rv;
2004 if (buf[0] == '+') 2004 if (buf[0] == '+')
2005 rv = strict_strtoll(buf+1, 10, &offset); 2005 rv = kstrtoll(buf+1, 10, &offset);
2006 else 2006 else
2007 rv = strict_strtoll(buf, 10, &offset); 2007 rv = kstrtoll(buf, 10, &offset);
2008 if (rv) 2008 if (rv)
2009 return rv; 2009 return rv;
2010 if (offset == 0) 2010 if (offset == 0)
@@ -2139,7 +2139,7 @@ static ssize_t
2139backlog_store(struct mddev *mddev, const char *buf, size_t len) 2139backlog_store(struct mddev *mddev, const char *buf, size_t len)
2140{ 2140{
2141 unsigned long backlog; 2141 unsigned long backlog;
2142 int rv = strict_strtoul(buf, 10, &backlog); 2142 int rv = kstrtoul(buf, 10, &backlog);
2143 if (rv) 2143 if (rv)
2144 return rv; 2144 return rv;
2145 if (backlog > COUNTER_MAX) 2145 if (backlog > COUNTER_MAX)
@@ -2165,7 +2165,7 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2165 unsigned long csize; 2165 unsigned long csize;
2166 if (mddev->bitmap) 2166 if (mddev->bitmap)
2167 return -EBUSY; 2167 return -EBUSY;
2168 rv = strict_strtoul(buf, 10, &csize); 2168 rv = kstrtoul(buf, 10, &csize);
2169 if (rv) 2169 if (rv)
2170 return rv; 2170 return rv;
2171 if (csize < 512 || 2171 if (csize < 512 ||
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1d3fe1a40a9b..4880b69e2e9e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
380static int validate_raid_redundancy(struct raid_set *rs) 380static int validate_raid_redundancy(struct raid_set *rs)
381{ 381{
382 unsigned i, rebuild_cnt = 0; 382 unsigned i, rebuild_cnt = 0;
383 unsigned rebuilds_per_group, copies, d; 383 unsigned rebuilds_per_group = 0, copies, d;
384 unsigned group_size, last_group_start; 384 unsigned group_size, last_group_start;
385 385
386 for (i = 0; i < rs->md.raid_disks; i++) 386 for (i = 0; i < rs->md.raid_disks; i++)
@@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
504 * First, parse the in-order required arguments 504 * First, parse the in-order required arguments
505 * "chunk_size" is the only argument of this type. 505 * "chunk_size" is the only argument of this type.
506 */ 506 */
507 if ((strict_strtoul(argv[0], 10, &value) < 0)) { 507 if ((kstrtoul(argv[0], 10, &value) < 0)) {
508 rs->ti->error = "Bad chunk size"; 508 rs->ti->error = "Bad chunk size";
509 return -EINVAL; 509 return -EINVAL;
510 } else if (rs->raid_type->level == 1) { 510 } else if (rs->raid_type->level == 1) {
@@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
585 continue; 585 continue;
586 } 586 }
587 587
588 if (strict_strtoul(argv[i], 10, &value) < 0) { 588 if (kstrtoul(argv[i], 10, &value) < 0) {
589 rs->ti->error = "Bad numerical argument given in raid params"; 589 rs->ti->error = "Bad numerical argument given in raid params";
590 return -EINVAL; 590 return -EINVAL;
591 } 591 }
@@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1181 argv++; 1181 argv++;
1182 1182
1183 /* number of RAID parameters */ 1183 /* number of RAID parameters */
1184 if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { 1184 if (kstrtoul(argv[0], 10, &num_raid_params) < 0) {
1185 ti->error = "Cannot understand number of RAID parameters"; 1185 ti->error = "Cannot understand number of RAID parameters";
1186 return -EINVAL; 1186 return -EINVAL;
1187 } 1187 }
@@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1194 return -EINVAL; 1194 return -EINVAL;
1195 } 1195 }
1196 1196
1197 if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || 1197 if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
1198 (num_raid_devs >= INT_MAX)) { 1198 (num_raid_devs >= INT_MAX)) {
1199 ti->error = "Cannot understand number of raid devices"; 1199 ti->error = "Cannot understand number of raid devices";
1200 return -EINVAL; 1200 return -EINVAL;
@@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
1388 * performing a "check" of the array. 1388 * performing a "check" of the array.
1389 */ 1389 */
1390 DMEMIT(" %llu", 1390 DMEMIT(" %llu",
1391 (strcmp(rs->md.last_sync_action, "check")) ? 0 :
1391 (unsigned long long) 1392 (unsigned long long)
1392 atomic64_read(&rs->md.resync_mismatches)); 1393 atomic64_read(&rs->md.resync_mismatches));
1393 break; 1394 break;
@@ -1572,6 +1573,62 @@ static void raid_postsuspend(struct dm_target *ti)
1572 mddev_suspend(&rs->md); 1573 mddev_suspend(&rs->md);
1573} 1574}
1574 1575
1576static void attempt_restore_of_faulty_devices(struct raid_set *rs)
1577{
1578 int i;
1579 uint64_t failed_devices, cleared_failed_devices = 0;
1580 unsigned long flags;
1581 struct dm_raid_superblock *sb;
1582 struct md_rdev *r;
1583
1584 for (i = 0; i < rs->md.raid_disks; i++) {
1585 r = &rs->dev[i].rdev;
1586 if (test_bit(Faulty, &r->flags) && r->sb_page &&
1587 sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
1588 DMINFO("Faulty %s device #%d has readable super block."
1589 " Attempting to revive it.",
1590 rs->raid_type->name, i);
1591
1592 /*
1593 * Faulty bit may be set, but sometimes the array can
1594 * be suspended before the personalities can respond
1595 * by removing the device from the array (i.e. calling
1596 * 'hot_remove_disk'). If they haven't yet removed
1597 * the failed device, its 'raid_disk' number will be
1598 * '>= 0' - meaning we must call this function
1599 * ourselves.
1600 */
1601 if ((r->raid_disk >= 0) &&
1602 (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
1603 /* Failed to revive this device, try next */
1604 continue;
1605
1606 r->raid_disk = i;
1607 r->saved_raid_disk = i;
1608 flags = r->flags;
1609 clear_bit(Faulty, &r->flags);
1610 clear_bit(WriteErrorSeen, &r->flags);
1611 clear_bit(In_sync, &r->flags);
1612 if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
1613 r->raid_disk = -1;
1614 r->saved_raid_disk = -1;
1615 r->flags = flags;
1616 } else {
1617 r->recovery_offset = 0;
1618 cleared_failed_devices |= 1 << i;
1619 }
1620 }
1621 }
1622 if (cleared_failed_devices) {
1623 rdev_for_each(r, &rs->md) {
1624 sb = page_address(r->sb_page);
1625 failed_devices = le64_to_cpu(sb->failed_devices);
1626 failed_devices &= ~cleared_failed_devices;
1627 sb->failed_devices = cpu_to_le64(failed_devices);
1628 }
1629 }
1630}
1631
1575static void raid_resume(struct dm_target *ti) 1632static void raid_resume(struct dm_target *ti)
1576{ 1633{
1577 struct raid_set *rs = ti->private; 1634 struct raid_set *rs = ti->private;
@@ -1580,6 +1637,13 @@ static void raid_resume(struct dm_target *ti)
1580 if (!rs->bitmap_loaded) { 1637 if (!rs->bitmap_loaded) {
1581 bitmap_load(&rs->md); 1638 bitmap_load(&rs->md);
1582 rs->bitmap_loaded = 1; 1639 rs->bitmap_loaded = 1;
1640 } else {
1641 /*
1642 * A secondary resume while the device is active.
1643 * Take this opportunity to check whether any failed
1644 * devices are reachable again.
1645 */
1646 attempt_restore_of_faulty_devices(rs);
1583 } 1647 }
1584 1648
1585 clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); 1649 clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
@@ -1588,7 +1652,7 @@ static void raid_resume(struct dm_target *ti)
1588 1652
1589static struct target_type raid_target = { 1653static struct target_type raid_target = {
1590 .name = "raid", 1654 .name = "raid",
1591 .version = {1, 5, 0}, 1655 .version = {1, 5, 2},
1592 .module = THIS_MODULE, 1656 .module = THIS_MODULE,
1593 .ctr = raid_ctr, 1657 .ctr = raid_ctr,
1594 .dtr = raid_dtr, 1658 .dtr = raid_dtr,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9b82377a833b..dddc87bcf64a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -521,6 +521,7 @@ void mddev_init(struct mddev *mddev)
521 init_waitqueue_head(&mddev->recovery_wait); 521 init_waitqueue_head(&mddev->recovery_wait);
522 mddev->reshape_position = MaxSector; 522 mddev->reshape_position = MaxSector;
523 mddev->reshape_backwards = 0; 523 mddev->reshape_backwards = 0;
524 mddev->last_sync_action = "none";
524 mddev->resync_min = 0; 525 mddev->resync_min = 0;
525 mddev->resync_max = MaxSector; 526 mddev->resync_max = MaxSector;
526 mddev->level = LEVEL_NONE; 527 mddev->level = LEVEL_NONE;
@@ -2867,7 +2868,7 @@ static ssize_t
2867offset_store(struct md_rdev *rdev, const char *buf, size_t len) 2868offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2868{ 2869{
2869 unsigned long long offset; 2870 unsigned long long offset;
2870 if (strict_strtoull(buf, 10, &offset) < 0) 2871 if (kstrtoull(buf, 10, &offset) < 0)
2871 return -EINVAL; 2872 return -EINVAL;
2872 if (rdev->mddev->pers && rdev->raid_disk >= 0) 2873 if (rdev->mddev->pers && rdev->raid_disk >= 0)
2873 return -EBUSY; 2874 return -EBUSY;
@@ -2895,7 +2896,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
2895 unsigned long long new_offset; 2896 unsigned long long new_offset;
2896 struct mddev *mddev = rdev->mddev; 2897 struct mddev *mddev = rdev->mddev;
2897 2898
2898 if (strict_strtoull(buf, 10, &new_offset) < 0) 2899 if (kstrtoull(buf, 10, &new_offset) < 0)
2899 return -EINVAL; 2900 return -EINVAL;
2900 2901
2901 if (mddev->sync_thread) 2902 if (mddev->sync_thread)
@@ -2961,7 +2962,7 @@ static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
2961 unsigned long long blocks; 2962 unsigned long long blocks;
2962 sector_t new; 2963 sector_t new;
2963 2964
2964 if (strict_strtoull(buf, 10, &blocks) < 0) 2965 if (kstrtoull(buf, 10, &blocks) < 0)
2965 return -EINVAL; 2966 return -EINVAL;
2966 2967
2967 if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) 2968 if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
@@ -3069,7 +3070,7 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_
3069 3070
3070 if (cmd_match(buf, "none")) 3071 if (cmd_match(buf, "none"))
3071 recovery_start = MaxSector; 3072 recovery_start = MaxSector;
3072 else if (strict_strtoull(buf, 10, &recovery_start)) 3073 else if (kstrtoull(buf, 10, &recovery_start))
3073 return -EINVAL; 3074 return -EINVAL;
3074 3075
3075 if (rdev->mddev->pers && 3076 if (rdev->mddev->pers &&
@@ -3497,7 +3498,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3497 if (clevel[len-1] == '\n') 3498 if (clevel[len-1] == '\n')
3498 len--; 3499 len--;
3499 clevel[len] = 0; 3500 clevel[len] = 0;
3500 if (strict_strtol(clevel, 10, &level)) 3501 if (kstrtol(clevel, 10, &level))
3501 level = LEVEL_NONE; 3502 level = LEVEL_NONE;
3502 3503
3503 if (request_module("md-%s", clevel) != 0) 3504 if (request_module("md-%s", clevel) != 0)
@@ -4272,6 +4273,17 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4272 return len; 4273 return len;
4273} 4274}
4274 4275
4276static struct md_sysfs_entry md_scan_mode =
4277__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4278
4279static ssize_t
4280last_sync_action_show(struct mddev *mddev, char *page)
4281{
4282 return sprintf(page, "%s\n", mddev->last_sync_action);
4283}
4284
4285static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
4286
4275static ssize_t 4287static ssize_t
4276mismatch_cnt_show(struct mddev *mddev, char *page) 4288mismatch_cnt_show(struct mddev *mddev, char *page)
4277{ 4289{
@@ -4280,10 +4292,6 @@ mismatch_cnt_show(struct mddev *mddev, char *page)
4280 atomic64_read(&mddev->resync_mismatches)); 4292 atomic64_read(&mddev->resync_mismatches));
4281} 4293}
4282 4294
4283static struct md_sysfs_entry md_scan_mode =
4284__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4285
4286
4287static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); 4295static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
4288 4296
4289static ssize_t 4297static ssize_t
@@ -4356,7 +4364,7 @@ sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
4356{ 4364{
4357 long n; 4365 long n;
4358 4366
4359 if (strict_strtol(buf, 10, &n)) 4367 if (kstrtol(buf, 10, &n))
4360 return -EINVAL; 4368 return -EINVAL;
4361 4369
4362 if (n != 0 && n != 1) 4370 if (n != 0 && n != 1)
@@ -4424,7 +4432,7 @@ static ssize_t
4424min_sync_store(struct mddev *mddev, const char *buf, size_t len) 4432min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4425{ 4433{
4426 unsigned long long min; 4434 unsigned long long min;
4427 if (strict_strtoull(buf, 10, &min)) 4435 if (kstrtoull(buf, 10, &min))
4428 return -EINVAL; 4436 return -EINVAL;
4429 if (min > mddev->resync_max) 4437 if (min > mddev->resync_max)
4430 return -EINVAL; 4438 return -EINVAL;
@@ -4461,7 +4469,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4461 mddev->resync_max = MaxSector; 4469 mddev->resync_max = MaxSector;
4462 else { 4470 else {
4463 unsigned long long max; 4471 unsigned long long max;
4464 if (strict_strtoull(buf, 10, &max)) 4472 if (kstrtoull(buf, 10, &max))
4465 return -EINVAL; 4473 return -EINVAL;
4466 if (max < mddev->resync_min) 4474 if (max < mddev->resync_min)
4467 return -EINVAL; 4475 return -EINVAL;
@@ -4686,6 +4694,7 @@ static struct attribute *md_default_attrs[] = {
4686 4694
4687static struct attribute *md_redundancy_attrs[] = { 4695static struct attribute *md_redundancy_attrs[] = {
4688 &md_scan_mode.attr, 4696 &md_scan_mode.attr,
4697 &md_last_scan_mode.attr,
4689 &md_mismatches.attr, 4698 &md_mismatches.attr,
4690 &md_sync_min.attr, 4699 &md_sync_min.attr,
4691 &md_sync_max.attr, 4700 &md_sync_max.attr,
@@ -6405,6 +6414,12 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6405 /* need to ensure md_delayed_delete() has completed */ 6414 /* need to ensure md_delayed_delete() has completed */
6406 flush_workqueue(md_misc_wq); 6415 flush_workqueue(md_misc_wq);
6407 6416
6417 if (cmd == HOT_REMOVE_DISK)
6418 /* need to ensure recovery thread has run */
6419 wait_event_interruptible_timeout(mddev->sb_wait,
6420 !test_bit(MD_RECOVERY_NEEDED,
6421 &mddev->flags),
6422 msecs_to_jiffies(5000));
6408 err = mddev_lock(mddev); 6423 err = mddev_lock(mddev);
6409 if (err) { 6424 if (err) {
6410 printk(KERN_INFO 6425 printk(KERN_INFO
@@ -7323,7 +7338,7 @@ void md_do_sync(struct md_thread *thread)
7323 sector_t last_check; 7338 sector_t last_check;
7324 int skipped = 0; 7339 int skipped = 0;
7325 struct md_rdev *rdev; 7340 struct md_rdev *rdev;
7326 char *desc; 7341 char *desc, *action = NULL;
7327 struct blk_plug plug; 7342 struct blk_plug plug;
7328 7343
7329 /* just incase thread restarts... */ 7344 /* just incase thread restarts... */
@@ -7333,17 +7348,21 @@ void md_do_sync(struct md_thread *thread)
7333 return; 7348 return;
7334 7349
7335 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 7350 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
7336 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 7351 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
7337 desc = "data-check"; 7352 desc = "data-check";
7338 else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 7353 action = "check";
7354 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
7339 desc = "requested-resync"; 7355 desc = "requested-resync";
7340 else 7356 action = "repair";
7357 } else
7341 desc = "resync"; 7358 desc = "resync";
7342 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 7359 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
7343 desc = "reshape"; 7360 desc = "reshape";
7344 else 7361 else
7345 desc = "recovery"; 7362 desc = "recovery";
7346 7363
7364 mddev->last_sync_action = action ?: desc;
7365
7347 /* we overload curr_resync somewhat here. 7366 /* we overload curr_resync somewhat here.
7348 * 0 == not engaged in resync at all 7367 * 0 == not engaged in resync at all
7349 * 2 == checking that there is no conflict with another sync 7368 * 2 == checking that there is no conflict with another sync
@@ -7892,6 +7911,8 @@ void md_check_recovery(struct mddev *mddev)
7892 md_new_event(mddev); 7911 md_new_event(mddev);
7893 } 7912 }
7894 unlock: 7913 unlock:
7914 wake_up(&mddev->sb_wait);
7915
7895 if (!mddev->sync_thread) { 7916 if (!mddev->sync_thread) {
7896 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7917 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7897 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 7918 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 653f992b687a..20f02c0b5f2d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -268,6 +268,14 @@ struct mddev {
268 268
269 struct md_thread *thread; /* management thread */ 269 struct md_thread *thread; /* management thread */
270 struct md_thread *sync_thread; /* doing resync or reconstruct */ 270 struct md_thread *sync_thread; /* doing resync or reconstruct */
271
272 /* 'last_sync_action' is initialized to "none". It is set when a
273 * sync operation (i.e "data-check", "requested-resync", "resync",
274 * "recovery", or "reshape") is started. It holds this value even
275 * when the sync thread is "frozen" (interrupted) or "idle" (stopped
276 * or finished). It is overwritten when a new sync operation is begun.
277 */
278 char *last_sync_action;
271 sector_t curr_resync; /* last block scheduled */ 279 sector_t curr_resync; /* last block scheduled */
272 /* As resync requests can complete out of order, we cannot easily track 280 /* As resync requests can complete out of order, we cannot easily track
273 * how much resync has been completed. So we occasionally pause until 281 * how much resync has been completed. So we occasionally pause until
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index fcf65e512cf5..c4d420b7d2f4 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -597,6 +597,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
597 mdname(mddev)); 597 mdname(mddev));
598 return ERR_PTR(-EINVAL); 598 return ERR_PTR(-EINVAL);
599 } 599 }
600 rdev->sectors = mddev->dev_sectors;
600 } 601 }
601 602
602 /* Set new parameters */ 603 /* Set new parameters */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6e17f8181c4b..ec734588a1c6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1519,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1519 p = conf->mirrors+mirror; 1519 p = conf->mirrors+mirror;
1520 if (!p->rdev) { 1520 if (!p->rdev) {
1521 1521
1522 disk_stack_limits(mddev->gendisk, rdev->bdev, 1522 if (mddev->gendisk)
1523 rdev->data_offset << 9); 1523 disk_stack_limits(mddev->gendisk, rdev->bdev,
1524 rdev->data_offset << 9);
1524 1525
1525 p->head_position = 0; 1526 p->head_position = 0;
1526 rdev->raid_disk = mirror; 1527 rdev->raid_disk = mirror;
@@ -1559,7 +1560,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1559 clear_bit(Unmerged, &rdev->flags); 1560 clear_bit(Unmerged, &rdev->flags);
1560 } 1561 }
1561 md_integrity_add_rdev(rdev, mddev); 1562 md_integrity_add_rdev(rdev, mddev);
1562 if (blk_queue_discard(bdev_get_queue(rdev->bdev))) 1563 if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
1563 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); 1564 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
1564 print_conf(conf); 1565 print_conf(conf);
1565 return err; 1566 return err;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6ddae2501b9a..cd066b63bdaf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
97 97
98static void allow_barrier(struct r10conf *conf); 98static void allow_barrier(struct r10conf *conf);
99static void lower_barrier(struct r10conf *conf); 99static void lower_barrier(struct r10conf *conf);
100static int enough(struct r10conf *conf, int ignore); 100static int _enough(struct r10conf *conf, int previous, int ignore);
101static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, 101static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
102 int *skipped); 102 int *skipped);
103static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); 103static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
392 * than fail the last device. Here we redefine 392 * than fail the last device. Here we redefine
393 * "uptodate" to mean "Don't want to retry" 393 * "uptodate" to mean "Don't want to retry"
394 */ 394 */
395 unsigned long flags; 395 if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
396 spin_lock_irqsave(&conf->device_lock, flags); 396 rdev->raid_disk))
397 if (!enough(conf, rdev->raid_disk))
398 uptodate = 1; 397 uptodate = 1;
399 spin_unlock_irqrestore(&conf->device_lock, flags);
400 } 398 }
401 if (uptodate) { 399 if (uptodate) {
402 raid_end_bio_io(r10_bio); 400 raid_end_bio_io(r10_bio);
@@ -1632,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev)
1632 * Don't consider the device numbered 'ignore' 1630 * Don't consider the device numbered 'ignore'
1633 * as we might be about to remove it. 1631 * as we might be about to remove it.
1634 */ 1632 */
1635static int _enough(struct r10conf *conf, struct geom *geo, int ignore) 1633static int _enough(struct r10conf *conf, int previous, int ignore)
1636{ 1634{
1637 int first = 0; 1635 int first = 0;
1636 int has_enough = 0;
1637 int disks, ncopies;
1638 if (previous) {
1639 disks = conf->prev.raid_disks;
1640 ncopies = conf->prev.near_copies;
1641 } else {
1642 disks = conf->geo.raid_disks;
1643 ncopies = conf->geo.near_copies;
1644 }
1638 1645
1646 rcu_read_lock();
1639 do { 1647 do {
1640 int n = conf->copies; 1648 int n = conf->copies;
1641 int cnt = 0; 1649 int cnt = 0;
1642 int this = first; 1650 int this = first;
1643 while (n--) { 1651 while (n--) {
1644 if (conf->mirrors[this].rdev && 1652 struct md_rdev *rdev;
1645 this != ignore) 1653 if (this != ignore &&
1654 (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
1655 test_bit(In_sync, &rdev->flags))
1646 cnt++; 1656 cnt++;
1647 this = (this+1) % geo->raid_disks; 1657 this = (this+1) % disks;
1648 } 1658 }
1649 if (cnt == 0) 1659 if (cnt == 0)
1650 return 0; 1660 goto out;
1651 first = (first + geo->near_copies) % geo->raid_disks; 1661 first = (first + ncopies) % disks;
1652 } while (first != 0); 1662 } while (first != 0);
1653 return 1; 1663 has_enough = 1;
1664out:
1665 rcu_read_unlock();
1666 return has_enough;
1654} 1667}
1655 1668
1656static int enough(struct r10conf *conf, int ignore) 1669static int enough(struct r10conf *conf, int ignore)
1657{ 1670{
1658 return _enough(conf, &conf->geo, ignore) && 1671 /* when calling 'enough', both 'prev' and 'geo' must
1659 _enough(conf, &conf->prev, ignore); 1672 * be stable.
1673 * This is ensured if ->reconfig_mutex or ->device_lock
1674 * is held.
1675 */
1676 return _enough(conf, 0, ignore) &&
1677 _enough(conf, 1, ignore);
1660} 1678}
1661 1679
1662static void error(struct mddev *mddev, struct md_rdev *rdev) 1680static void error(struct mddev *mddev, struct md_rdev *rdev)
1663{ 1681{
1664 char b[BDEVNAME_SIZE]; 1682 char b[BDEVNAME_SIZE];
1665 struct r10conf *conf = mddev->private; 1683 struct r10conf *conf = mddev->private;
1684 unsigned long flags;
1666 1685
1667 /* 1686 /*
1668 * If it is not operational, then we have already marked it as dead 1687 * If it is not operational, then we have already marked it as dead
@@ -1670,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
1670 * next level up know. 1689 * next level up know.
1671 * else mark the drive as failed 1690 * else mark the drive as failed
1672 */ 1691 */
1692 spin_lock_irqsave(&conf->device_lock, flags);
1673 if (test_bit(In_sync, &rdev->flags) 1693 if (test_bit(In_sync, &rdev->flags)
1674 && !enough(conf, rdev->raid_disk)) 1694 && !enough(conf, rdev->raid_disk)) {
1675 /* 1695 /*
1676 * Don't fail the drive, just return an IO error. 1696 * Don't fail the drive, just return an IO error.
1677 */ 1697 */
1698 spin_unlock_irqrestore(&conf->device_lock, flags);
1678 return; 1699 return;
1700 }
1679 if (test_and_clear_bit(In_sync, &rdev->flags)) { 1701 if (test_and_clear_bit(In_sync, &rdev->flags)) {
1680 unsigned long flags;
1681 spin_lock_irqsave(&conf->device_lock, flags);
1682 mddev->degraded++; 1702 mddev->degraded++;
1683 spin_unlock_irqrestore(&conf->device_lock, flags); 1703 /*
1684 /*
1685 * if recovery is running, make sure it aborts. 1704 * if recovery is running, make sure it aborts.
1686 */ 1705 */
1687 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 1706 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -1689,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
1689 set_bit(Blocked, &rdev->flags); 1708 set_bit(Blocked, &rdev->flags);
1690 set_bit(Faulty, &rdev->flags); 1709 set_bit(Faulty, &rdev->flags);
1691 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1710 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1711 spin_unlock_irqrestore(&conf->device_lock, flags);
1692 printk(KERN_ALERT 1712 printk(KERN_ALERT
1693 "md/raid10:%s: Disk failure on %s, disabling device.\n" 1713 "md/raid10:%s: Disk failure on %s, disabling device.\n"
1694 "md/raid10:%s: Operation continuing on %d devices.\n", 1714 "md/raid10:%s: Operation continuing on %d devices.\n",
@@ -1791,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1791 * very different from resync 1811 * very different from resync
1792 */ 1812 */
1793 return -EBUSY; 1813 return -EBUSY;
1794 if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) 1814 if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
1795 return -EINVAL; 1815 return -EINVAL;
1796 1816
1797 if (rdev->raid_disk >= 0) 1817 if (rdev->raid_disk >= 0)
@@ -1819,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1819 set_bit(Replacement, &rdev->flags); 1839 set_bit(Replacement, &rdev->flags);
1820 rdev->raid_disk = mirror; 1840 rdev->raid_disk = mirror;
1821 err = 0; 1841 err = 0;
1822 disk_stack_limits(mddev->gendisk, rdev->bdev, 1842 if (mddev->gendisk)
1823 rdev->data_offset << 9); 1843 disk_stack_limits(mddev->gendisk, rdev->bdev,
1844 rdev->data_offset << 9);
1824 conf->fullsync = 1; 1845 conf->fullsync = 1;
1825 rcu_assign_pointer(p->replacement, rdev); 1846 rcu_assign_pointer(p->replacement, rdev);
1826 break; 1847 break;
1827 } 1848 }
1828 1849
1829 disk_stack_limits(mddev->gendisk, rdev->bdev, 1850 if (mddev->gendisk)
1830 rdev->data_offset << 9); 1851 disk_stack_limits(mddev->gendisk, rdev->bdev,
1852 rdev->data_offset << 9);
1831 1853
1832 p->head_position = 0; 1854 p->head_position = 0;
1833 p->recovery_disabled = mddev->recovery_disabled - 1; 1855 p->recovery_disabled = mddev->recovery_disabled - 1;
@@ -2909,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
2909 */ 2931 */
2910 if (mddev->bitmap == NULL && 2932 if (mddev->bitmap == NULL &&
2911 mddev->recovery_cp == MaxSector && 2933 mddev->recovery_cp == MaxSector &&
2934 mddev->reshape_position == MaxSector &&
2935 !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
2912 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && 2936 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
2937 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
2913 conf->fullsync == 0) { 2938 conf->fullsync == 0) {
2914 *skipped = 1; 2939 *skipped = 1;
2915 max_sector = mddev->dev_sectors; 2940 return mddev->dev_sectors - sector_nr;
2916 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
2917 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2918 max_sector = mddev->resync_max_sectors;
2919 return max_sector - sector_nr;
2920 } 2941 }
2921 2942
2922 skipped: 2943 skipped:
@@ -3532,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
3532 3553
3533 /* FIXME calc properly */ 3554 /* FIXME calc properly */
3534 conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + 3555 conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
3535 max(0,mddev->delta_disks)), 3556 max(0,-mddev->delta_disks)),
3536 GFP_KERNEL); 3557 GFP_KERNEL);
3537 if (!conf->mirrors) 3558 if (!conf->mirrors)
3538 goto out; 3559 goto out;
@@ -3691,7 +3712,7 @@ static int run(struct mddev *mddev)
3691 conf->geo.far_offset == 0) 3712 conf->geo.far_offset == 0)
3692 goto out_free_conf; 3713 goto out_free_conf;
3693 if (conf->prev.far_copies != 1 && 3714 if (conf->prev.far_copies != 1 &&
3694 conf->geo.far_offset == 0) 3715 conf->prev.far_offset == 0)
3695 goto out_free_conf; 3716 goto out_free_conf;
3696 } 3717 }
3697 3718
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 05e4a105b9c7..2bf094a587cb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4924,7 +4924,7 @@ raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
4924 if (!conf) 4924 if (!conf)
4925 return -ENODEV; 4925 return -ENODEV;
4926 4926
4927 if (strict_strtoul(page, 10, &new)) 4927 if (kstrtoul(page, 10, &new))
4928 return -EINVAL; 4928 return -EINVAL;
4929 err = raid5_set_cache_size(mddev, new); 4929 err = raid5_set_cache_size(mddev, new);
4930 if (err) 4930 if (err)
@@ -4957,7 +4957,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
4957 if (!conf) 4957 if (!conf)
4958 return -ENODEV; 4958 return -ENODEV;
4959 4959
4960 if (strict_strtoul(page, 10, &new)) 4960 if (kstrtoul(page, 10, &new))
4961 return -EINVAL; 4961 return -EINVAL;
4962 if (new > conf->max_nr_stripes) 4962 if (new > conf->max_nr_stripes)
4963 return -EINVAL; 4963 return -EINVAL;
@@ -5914,7 +5914,7 @@ static int check_reshape(struct mddev *mddev)
5914 return 0; /* nothing to do */ 5914 return 0; /* nothing to do */
5915 if (has_failed(conf)) 5915 if (has_failed(conf))
5916 return -EINVAL; 5916 return -EINVAL;
5917 if (mddev->delta_disks < 0) { 5917 if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) {
5918 /* We might be able to shrink, but the devices must 5918 /* We might be able to shrink, but the devices must
5919 * be made bigger first. 5919 * be made bigger first.
5920 * For raid6, 4 is the minimum size. 5920 * For raid6, 4 is the minimum size.