diff options
-rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 2 | ||||
-rw-r--r-- | Documentation/md.txt | 13 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 76 | ||||
-rw-r--r-- | drivers/md/md.c | 53 | ||||
-rw-r--r-- | drivers/md/md.h | 8 | ||||
-rw-r--r-- | drivers/md/raid0.c | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | drivers/md/raid10.c | 83 | ||||
-rw-r--r-- | drivers/md/raid5.c | 6 |
10 files changed, 187 insertions, 70 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index e9192283e5a5..ef8ba9fa58c4 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
@@ -222,3 +222,5 @@ Version History | |||
222 | 1.4.2 Add RAID10 "far" and "offset" algorithm support. | 222 | 1.4.2 Add RAID10 "far" and "offset" algorithm support. |
223 | 1.5.0 Add message interface to allow manipulation of the sync_action. | 223 | 1.5.0 Add message interface to allow manipulation of the sync_action. |
224 | New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt. | 224 | New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt. |
225 | 1.5.1 Add ability to restore transiently failed devices on resume. | ||
226 | 1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check". | ||
diff --git a/Documentation/md.txt b/Documentation/md.txt index e0ddd327632d..fbb2fcbf16b6 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -566,13 +566,6 @@ also have | |||
566 | when it reaches the current sync_max (below) and possibly at | 566 | when it reaches the current sync_max (below) and possibly at |
567 | other times. | 567 | other times. |
568 | 568 | ||
569 | sync_max | ||
570 | This is a number of sectors at which point a resync/recovery | ||
571 | process will pause. When a resync is active, the value can | ||
572 | only ever be increased, never decreased. The value of 'max' | ||
573 | effectively disables the limit. | ||
574 | |||
575 | |||
576 | sync_speed | 569 | sync_speed |
577 | This shows the current actual speed, in K/sec, of the current | 570 | This shows the current actual speed, in K/sec, of the current |
578 | sync_action. It is averaged over the last 30 seconds. | 571 | sync_action. It is averaged over the last 30 seconds. |
@@ -593,6 +586,12 @@ also have | |||
593 | that number to reach sync_max. Then you can either increase | 586 | that number to reach sync_max. Then you can either increase |
594 | "sync_max", or can write 'idle' to "sync_action". | 587 | "sync_max", or can write 'idle' to "sync_action". |
595 | 588 | ||
589 | The value of 'max' for "sync_max" effectively disables the limit. | ||
590 | When a resync is active, the value can only ever be increased, | ||
591 | never decreased. | ||
592 | The value of '0' is the minimum for "sync_min". | ||
593 | |||
594 | |||
596 | 595 | ||
597 | Each active md device may also have attributes specific to the | 596 | Each active md device may also have attributes specific to the |
598 | personality module that manages it. | 597 | personality module that manages it. |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 5a2c75499824..a7fd82133b12 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -2002,9 +2002,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len) | |||
2002 | } else { | 2002 | } else { |
2003 | int rv; | 2003 | int rv; |
2004 | if (buf[0] == '+') | 2004 | if (buf[0] == '+') |
2005 | rv = strict_strtoll(buf+1, 10, &offset); | 2005 | rv = kstrtoll(buf+1, 10, &offset); |
2006 | else | 2006 | else |
2007 | rv = strict_strtoll(buf, 10, &offset); | 2007 | rv = kstrtoll(buf, 10, &offset); |
2008 | if (rv) | 2008 | if (rv) |
2009 | return rv; | 2009 | return rv; |
2010 | if (offset == 0) | 2010 | if (offset == 0) |
@@ -2139,7 +2139,7 @@ static ssize_t | |||
2139 | backlog_store(struct mddev *mddev, const char *buf, size_t len) | 2139 | backlog_store(struct mddev *mddev, const char *buf, size_t len) |
2140 | { | 2140 | { |
2141 | unsigned long backlog; | 2141 | unsigned long backlog; |
2142 | int rv = strict_strtoul(buf, 10, &backlog); | 2142 | int rv = kstrtoul(buf, 10, &backlog); |
2143 | if (rv) | 2143 | if (rv) |
2144 | return rv; | 2144 | return rv; |
2145 | if (backlog > COUNTER_MAX) | 2145 | if (backlog > COUNTER_MAX) |
@@ -2165,7 +2165,7 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len) | |||
2165 | unsigned long csize; | 2165 | unsigned long csize; |
2166 | if (mddev->bitmap) | 2166 | if (mddev->bitmap) |
2167 | return -EBUSY; | 2167 | return -EBUSY; |
2168 | rv = strict_strtoul(buf, 10, &csize); | 2168 | rv = kstrtoul(buf, 10, &csize); |
2169 | if (rv) | 2169 | if (rv) |
2170 | return rv; | 2170 | return rv; |
2171 | if (csize < 512 || | 2171 | if (csize < 512 || |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1d3fe1a40a9b..4880b69e2e9e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
380 | static int validate_raid_redundancy(struct raid_set *rs) | 380 | static int validate_raid_redundancy(struct raid_set *rs) |
381 | { | 381 | { |
382 | unsigned i, rebuild_cnt = 0; | 382 | unsigned i, rebuild_cnt = 0; |
383 | unsigned rebuilds_per_group, copies, d; | 383 | unsigned rebuilds_per_group = 0, copies, d; |
384 | unsigned group_size, last_group_start; | 384 | unsigned group_size, last_group_start; |
385 | 385 | ||
386 | for (i = 0; i < rs->md.raid_disks; i++) | 386 | for (i = 0; i < rs->md.raid_disks; i++) |
@@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
504 | * First, parse the in-order required arguments | 504 | * First, parse the in-order required arguments |
505 | * "chunk_size" is the only argument of this type. | 505 | * "chunk_size" is the only argument of this type. |
506 | */ | 506 | */ |
507 | if ((strict_strtoul(argv[0], 10, &value) < 0)) { | 507 | if ((kstrtoul(argv[0], 10, &value) < 0)) { |
508 | rs->ti->error = "Bad chunk size"; | 508 | rs->ti->error = "Bad chunk size"; |
509 | return -EINVAL; | 509 | return -EINVAL; |
510 | } else if (rs->raid_type->level == 1) { | 510 | } else if (rs->raid_type->level == 1) { |
@@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
585 | continue; | 585 | continue; |
586 | } | 586 | } |
587 | 587 | ||
588 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 588 | if (kstrtoul(argv[i], 10, &value) < 0) { |
589 | rs->ti->error = "Bad numerical argument given in raid params"; | 589 | rs->ti->error = "Bad numerical argument given in raid params"; |
590 | return -EINVAL; | 590 | return -EINVAL; |
591 | } | 591 | } |
@@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1181 | argv++; | 1181 | argv++; |
1182 | 1182 | ||
1183 | /* number of RAID parameters */ | 1183 | /* number of RAID parameters */ |
1184 | if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) { | 1184 | if (kstrtoul(argv[0], 10, &num_raid_params) < 0) { |
1185 | ti->error = "Cannot understand number of RAID parameters"; | 1185 | ti->error = "Cannot understand number of RAID parameters"; |
1186 | return -EINVAL; | 1186 | return -EINVAL; |
1187 | } | 1187 | } |
@@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1194 | return -EINVAL; | 1194 | return -EINVAL; |
1195 | } | 1195 | } |
1196 | 1196 | ||
1197 | if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || | 1197 | if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) || |
1198 | (num_raid_devs >= INT_MAX)) { | 1198 | (num_raid_devs >= INT_MAX)) { |
1199 | ti->error = "Cannot understand number of raid devices"; | 1199 | ti->error = "Cannot understand number of raid devices"; |
1200 | return -EINVAL; | 1200 | return -EINVAL; |
@@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, | |||
1388 | * performing a "check" of the array. | 1388 | * performing a "check" of the array. |
1389 | */ | 1389 | */ |
1390 | DMEMIT(" %llu", | 1390 | DMEMIT(" %llu", |
1391 | (strcmp(rs->md.last_sync_action, "check")) ? 0 : | ||
1391 | (unsigned long long) | 1392 | (unsigned long long) |
1392 | atomic64_read(&rs->md.resync_mismatches)); | 1393 | atomic64_read(&rs->md.resync_mismatches)); |
1393 | break; | 1394 | break; |
@@ -1572,6 +1573,62 @@ static void raid_postsuspend(struct dm_target *ti) | |||
1572 | mddev_suspend(&rs->md); | 1573 | mddev_suspend(&rs->md); |
1573 | } | 1574 | } |
1574 | 1575 | ||
1576 | static void attempt_restore_of_faulty_devices(struct raid_set *rs) | ||
1577 | { | ||
1578 | int i; | ||
1579 | uint64_t failed_devices, cleared_failed_devices = 0; | ||
1580 | unsigned long flags; | ||
1581 | struct dm_raid_superblock *sb; | ||
1582 | struct md_rdev *r; | ||
1583 | |||
1584 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
1585 | r = &rs->dev[i].rdev; | ||
1586 | if (test_bit(Faulty, &r->flags) && r->sb_page && | ||
1587 | sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) { | ||
1588 | DMINFO("Faulty %s device #%d has readable super block." | ||
1589 | " Attempting to revive it.", | ||
1590 | rs->raid_type->name, i); | ||
1591 | |||
1592 | /* | ||
1593 | * Faulty bit may be set, but sometimes the array can | ||
1594 | * be suspended before the personalities can respond | ||
1595 | * by removing the device from the array (i.e. calling | ||
1596 | * 'hot_remove_disk'). If they haven't yet removed | ||
1597 | * the failed device, its 'raid_disk' number will be | ||
1598 | * '>= 0' - meaning we must call this function | ||
1599 | * ourselves. | ||
1600 | */ | ||
1601 | if ((r->raid_disk >= 0) && | ||
1602 | (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) | ||
1603 | /* Failed to revive this device, try next */ | ||
1604 | continue; | ||
1605 | |||
1606 | r->raid_disk = i; | ||
1607 | r->saved_raid_disk = i; | ||
1608 | flags = r->flags; | ||
1609 | clear_bit(Faulty, &r->flags); | ||
1610 | clear_bit(WriteErrorSeen, &r->flags); | ||
1611 | clear_bit(In_sync, &r->flags); | ||
1612 | if (r->mddev->pers->hot_add_disk(r->mddev, r)) { | ||
1613 | r->raid_disk = -1; | ||
1614 | r->saved_raid_disk = -1; | ||
1615 | r->flags = flags; | ||
1616 | } else { | ||
1617 | r->recovery_offset = 0; | ||
1618 | cleared_failed_devices |= 1 << i; | ||
1619 | } | ||
1620 | } | ||
1621 | } | ||
1622 | if (cleared_failed_devices) { | ||
1623 | rdev_for_each(r, &rs->md) { | ||
1624 | sb = page_address(r->sb_page); | ||
1625 | failed_devices = le64_to_cpu(sb->failed_devices); | ||
1626 | failed_devices &= ~cleared_failed_devices; | ||
1627 | sb->failed_devices = cpu_to_le64(failed_devices); | ||
1628 | } | ||
1629 | } | ||
1630 | } | ||
1631 | |||
1575 | static void raid_resume(struct dm_target *ti) | 1632 | static void raid_resume(struct dm_target *ti) |
1576 | { | 1633 | { |
1577 | struct raid_set *rs = ti->private; | 1634 | struct raid_set *rs = ti->private; |
@@ -1580,6 +1637,13 @@ static void raid_resume(struct dm_target *ti) | |||
1580 | if (!rs->bitmap_loaded) { | 1637 | if (!rs->bitmap_loaded) { |
1581 | bitmap_load(&rs->md); | 1638 | bitmap_load(&rs->md); |
1582 | rs->bitmap_loaded = 1; | 1639 | rs->bitmap_loaded = 1; |
1640 | } else { | ||
1641 | /* | ||
1642 | * A secondary resume while the device is active. | ||
1643 | * Take this opportunity to check whether any failed | ||
1644 | * devices are reachable again. | ||
1645 | */ | ||
1646 | attempt_restore_of_faulty_devices(rs); | ||
1583 | } | 1647 | } |
1584 | 1648 | ||
1585 | clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); | 1649 | clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); |
@@ -1588,7 +1652,7 @@ static void raid_resume(struct dm_target *ti) | |||
1588 | 1652 | ||
1589 | static struct target_type raid_target = { | 1653 | static struct target_type raid_target = { |
1590 | .name = "raid", | 1654 | .name = "raid", |
1591 | .version = {1, 5, 0}, | 1655 | .version = {1, 5, 2}, |
1592 | .module = THIS_MODULE, | 1656 | .module = THIS_MODULE, |
1593 | .ctr = raid_ctr, | 1657 | .ctr = raid_ctr, |
1594 | .dtr = raid_dtr, | 1658 | .dtr = raid_dtr, |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 9b82377a833b..dddc87bcf64a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -521,6 +521,7 @@ void mddev_init(struct mddev *mddev) | |||
521 | init_waitqueue_head(&mddev->recovery_wait); | 521 | init_waitqueue_head(&mddev->recovery_wait); |
522 | mddev->reshape_position = MaxSector; | 522 | mddev->reshape_position = MaxSector; |
523 | mddev->reshape_backwards = 0; | 523 | mddev->reshape_backwards = 0; |
524 | mddev->last_sync_action = "none"; | ||
524 | mddev->resync_min = 0; | 525 | mddev->resync_min = 0; |
525 | mddev->resync_max = MaxSector; | 526 | mddev->resync_max = MaxSector; |
526 | mddev->level = LEVEL_NONE; | 527 | mddev->level = LEVEL_NONE; |
@@ -2867,7 +2868,7 @@ static ssize_t | |||
2867 | offset_store(struct md_rdev *rdev, const char *buf, size_t len) | 2868 | offset_store(struct md_rdev *rdev, const char *buf, size_t len) |
2868 | { | 2869 | { |
2869 | unsigned long long offset; | 2870 | unsigned long long offset; |
2870 | if (strict_strtoull(buf, 10, &offset) < 0) | 2871 | if (kstrtoull(buf, 10, &offset) < 0) |
2871 | return -EINVAL; | 2872 | return -EINVAL; |
2872 | if (rdev->mddev->pers && rdev->raid_disk >= 0) | 2873 | if (rdev->mddev->pers && rdev->raid_disk >= 0) |
2873 | return -EBUSY; | 2874 | return -EBUSY; |
@@ -2895,7 +2896,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev, | |||
2895 | unsigned long long new_offset; | 2896 | unsigned long long new_offset; |
2896 | struct mddev *mddev = rdev->mddev; | 2897 | struct mddev *mddev = rdev->mddev; |
2897 | 2898 | ||
2898 | if (strict_strtoull(buf, 10, &new_offset) < 0) | 2899 | if (kstrtoull(buf, 10, &new_offset) < 0) |
2899 | return -EINVAL; | 2900 | return -EINVAL; |
2900 | 2901 | ||
2901 | if (mddev->sync_thread) | 2902 | if (mddev->sync_thread) |
@@ -2961,7 +2962,7 @@ static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) | |||
2961 | unsigned long long blocks; | 2962 | unsigned long long blocks; |
2962 | sector_t new; | 2963 | sector_t new; |
2963 | 2964 | ||
2964 | if (strict_strtoull(buf, 10, &blocks) < 0) | 2965 | if (kstrtoull(buf, 10, &blocks) < 0) |
2965 | return -EINVAL; | 2966 | return -EINVAL; |
2966 | 2967 | ||
2967 | if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) | 2968 | if (blocks & 1ULL << (8 * sizeof(blocks) - 1)) |
@@ -3069,7 +3070,7 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_ | |||
3069 | 3070 | ||
3070 | if (cmd_match(buf, "none")) | 3071 | if (cmd_match(buf, "none")) |
3071 | recovery_start = MaxSector; | 3072 | recovery_start = MaxSector; |
3072 | else if (strict_strtoull(buf, 10, &recovery_start)) | 3073 | else if (kstrtoull(buf, 10, &recovery_start)) |
3073 | return -EINVAL; | 3074 | return -EINVAL; |
3074 | 3075 | ||
3075 | if (rdev->mddev->pers && | 3076 | if (rdev->mddev->pers && |
@@ -3497,7 +3498,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3497 | if (clevel[len-1] == '\n') | 3498 | if (clevel[len-1] == '\n') |
3498 | len--; | 3499 | len--; |
3499 | clevel[len] = 0; | 3500 | clevel[len] = 0; |
3500 | if (strict_strtol(clevel, 10, &level)) | 3501 | if (kstrtol(clevel, 10, &level)) |
3501 | level = LEVEL_NONE; | 3502 | level = LEVEL_NONE; |
3502 | 3503 | ||
3503 | if (request_module("md-%s", clevel) != 0) | 3504 | if (request_module("md-%s", clevel) != 0) |
@@ -4272,6 +4273,17 @@ action_store(struct mddev *mddev, const char *page, size_t len) | |||
4272 | return len; | 4273 | return len; |
4273 | } | 4274 | } |
4274 | 4275 | ||
4276 | static struct md_sysfs_entry md_scan_mode = | ||
4277 | __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | ||
4278 | |||
4279 | static ssize_t | ||
4280 | last_sync_action_show(struct mddev *mddev, char *page) | ||
4281 | { | ||
4282 | return sprintf(page, "%s\n", mddev->last_sync_action); | ||
4283 | } | ||
4284 | |||
4285 | static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action); | ||
4286 | |||
4275 | static ssize_t | 4287 | static ssize_t |
4276 | mismatch_cnt_show(struct mddev *mddev, char *page) | 4288 | mismatch_cnt_show(struct mddev *mddev, char *page) |
4277 | { | 4289 | { |
@@ -4280,10 +4292,6 @@ mismatch_cnt_show(struct mddev *mddev, char *page) | |||
4280 | atomic64_read(&mddev->resync_mismatches)); | 4292 | atomic64_read(&mddev->resync_mismatches)); |
4281 | } | 4293 | } |
4282 | 4294 | ||
4283 | static struct md_sysfs_entry md_scan_mode = | ||
4284 | __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); | ||
4285 | |||
4286 | |||
4287 | static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); | 4295 | static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); |
4288 | 4296 | ||
4289 | static ssize_t | 4297 | static ssize_t |
@@ -4356,7 +4364,7 @@ sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len) | |||
4356 | { | 4364 | { |
4357 | long n; | 4365 | long n; |
4358 | 4366 | ||
4359 | if (strict_strtol(buf, 10, &n)) | 4367 | if (kstrtol(buf, 10, &n)) |
4360 | return -EINVAL; | 4368 | return -EINVAL; |
4361 | 4369 | ||
4362 | if (n != 0 && n != 1) | 4370 | if (n != 0 && n != 1) |
@@ -4424,7 +4432,7 @@ static ssize_t | |||
4424 | min_sync_store(struct mddev *mddev, const char *buf, size_t len) | 4432 | min_sync_store(struct mddev *mddev, const char *buf, size_t len) |
4425 | { | 4433 | { |
4426 | unsigned long long min; | 4434 | unsigned long long min; |
4427 | if (strict_strtoull(buf, 10, &min)) | 4435 | if (kstrtoull(buf, 10, &min)) |
4428 | return -EINVAL; | 4436 | return -EINVAL; |
4429 | if (min > mddev->resync_max) | 4437 | if (min > mddev->resync_max) |
4430 | return -EINVAL; | 4438 | return -EINVAL; |
@@ -4461,7 +4469,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len) | |||
4461 | mddev->resync_max = MaxSector; | 4469 | mddev->resync_max = MaxSector; |
4462 | else { | 4470 | else { |
4463 | unsigned long long max; | 4471 | unsigned long long max; |
4464 | if (strict_strtoull(buf, 10, &max)) | 4472 | if (kstrtoull(buf, 10, &max)) |
4465 | return -EINVAL; | 4473 | return -EINVAL; |
4466 | if (max < mddev->resync_min) | 4474 | if (max < mddev->resync_min) |
4467 | return -EINVAL; | 4475 | return -EINVAL; |
@@ -4686,6 +4694,7 @@ static struct attribute *md_default_attrs[] = { | |||
4686 | 4694 | ||
4687 | static struct attribute *md_redundancy_attrs[] = { | 4695 | static struct attribute *md_redundancy_attrs[] = { |
4688 | &md_scan_mode.attr, | 4696 | &md_scan_mode.attr, |
4697 | &md_last_scan_mode.attr, | ||
4689 | &md_mismatches.attr, | 4698 | &md_mismatches.attr, |
4690 | &md_sync_min.attr, | 4699 | &md_sync_min.attr, |
4691 | &md_sync_max.attr, | 4700 | &md_sync_max.attr, |
@@ -6405,6 +6414,12 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6405 | /* need to ensure md_delayed_delete() has completed */ | 6414 | /* need to ensure md_delayed_delete() has completed */ |
6406 | flush_workqueue(md_misc_wq); | 6415 | flush_workqueue(md_misc_wq); |
6407 | 6416 | ||
6417 | if (cmd == HOT_REMOVE_DISK) | ||
6418 | /* need to ensure recovery thread has run */ | ||
6419 | wait_event_interruptible_timeout(mddev->sb_wait, | ||
6420 | !test_bit(MD_RECOVERY_NEEDED, | ||
6421 | &mddev->flags), | ||
6422 | msecs_to_jiffies(5000)); | ||
6408 | err = mddev_lock(mddev); | 6423 | err = mddev_lock(mddev); |
6409 | if (err) { | 6424 | if (err) { |
6410 | printk(KERN_INFO | 6425 | printk(KERN_INFO |
@@ -7323,7 +7338,7 @@ void md_do_sync(struct md_thread *thread) | |||
7323 | sector_t last_check; | 7338 | sector_t last_check; |
7324 | int skipped = 0; | 7339 | int skipped = 0; |
7325 | struct md_rdev *rdev; | 7340 | struct md_rdev *rdev; |
7326 | char *desc; | 7341 | char *desc, *action = NULL; |
7327 | struct blk_plug plug; | 7342 | struct blk_plug plug; |
7328 | 7343 | ||
7329 | /* just incase thread restarts... */ | 7344 | /* just incase thread restarts... */ |
@@ -7333,17 +7348,21 @@ void md_do_sync(struct md_thread *thread) | |||
7333 | return; | 7348 | return; |
7334 | 7349 | ||
7335 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 7350 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
7336 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 7351 | if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { |
7337 | desc = "data-check"; | 7352 | desc = "data-check"; |
7338 | else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 7353 | action = "check"; |
7354 | } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
7339 | desc = "requested-resync"; | 7355 | desc = "requested-resync"; |
7340 | else | 7356 | action = "repair"; |
7357 | } else | ||
7341 | desc = "resync"; | 7358 | desc = "resync"; |
7342 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 7359 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
7343 | desc = "reshape"; | 7360 | desc = "reshape"; |
7344 | else | 7361 | else |
7345 | desc = "recovery"; | 7362 | desc = "recovery"; |
7346 | 7363 | ||
7364 | mddev->last_sync_action = action ?: desc; | ||
7365 | |||
7347 | /* we overload curr_resync somewhat here. | 7366 | /* we overload curr_resync somewhat here. |
7348 | * 0 == not engaged in resync at all | 7367 | * 0 == not engaged in resync at all |
7349 | * 2 == checking that there is no conflict with another sync | 7368 | * 2 == checking that there is no conflict with another sync |
@@ -7892,6 +7911,8 @@ void md_check_recovery(struct mddev *mddev) | |||
7892 | md_new_event(mddev); | 7911 | md_new_event(mddev); |
7893 | } | 7912 | } |
7894 | unlock: | 7913 | unlock: |
7914 | wake_up(&mddev->sb_wait); | ||
7915 | |||
7895 | if (!mddev->sync_thread) { | 7916 | if (!mddev->sync_thread) { |
7896 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 7917 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
7897 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 7918 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 653f992b687a..20f02c0b5f2d 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -268,6 +268,14 @@ struct mddev { | |||
268 | 268 | ||
269 | struct md_thread *thread; /* management thread */ | 269 | struct md_thread *thread; /* management thread */ |
270 | struct md_thread *sync_thread; /* doing resync or reconstruct */ | 270 | struct md_thread *sync_thread; /* doing resync or reconstruct */ |
271 | |||
272 | /* 'last_sync_action' is initialized to "none". It is set when a | ||
273 | * sync operation (i.e "data-check", "requested-resync", "resync", | ||
274 | * "recovery", or "reshape") is started. It holds this value even | ||
275 | * when the sync thread is "frozen" (interrupted) or "idle" (stopped | ||
276 | * or finished). It is overwritten when a new sync operation is begun. | ||
277 | */ | ||
278 | char *last_sync_action; | ||
271 | sector_t curr_resync; /* last block scheduled */ | 279 | sector_t curr_resync; /* last block scheduled */ |
272 | /* As resync requests can complete out of order, we cannot easily track | 280 | /* As resync requests can complete out of order, we cannot easily track |
273 | * how much resync has been completed. So we occasionally pause until | 281 | * how much resync has been completed. So we occasionally pause until |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index fcf65e512cf5..c4d420b7d2f4 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -597,6 +597,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev) | |||
597 | mdname(mddev)); | 597 | mdname(mddev)); |
598 | return ERR_PTR(-EINVAL); | 598 | return ERR_PTR(-EINVAL); |
599 | } | 599 | } |
600 | rdev->sectors = mddev->dev_sectors; | ||
600 | } | 601 | } |
601 | 602 | ||
602 | /* Set new parameters */ | 603 | /* Set new parameters */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6e17f8181c4b..ec734588a1c6 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1519,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1519 | p = conf->mirrors+mirror; | 1519 | p = conf->mirrors+mirror; |
1520 | if (!p->rdev) { | 1520 | if (!p->rdev) { |
1521 | 1521 | ||
1522 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1522 | if (mddev->gendisk) |
1523 | rdev->data_offset << 9); | 1523 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1524 | rdev->data_offset << 9); | ||
1524 | 1525 | ||
1525 | p->head_position = 0; | 1526 | p->head_position = 0; |
1526 | rdev->raid_disk = mirror; | 1527 | rdev->raid_disk = mirror; |
@@ -1559,7 +1560,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1559 | clear_bit(Unmerged, &rdev->flags); | 1560 | clear_bit(Unmerged, &rdev->flags); |
1560 | } | 1561 | } |
1561 | md_integrity_add_rdev(rdev, mddev); | 1562 | md_integrity_add_rdev(rdev, mddev); |
1562 | if (blk_queue_discard(bdev_get_queue(rdev->bdev))) | 1563 | if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) |
1563 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); | 1564 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); |
1564 | print_conf(conf); | 1565 | print_conf(conf); |
1565 | return err; | 1566 | return err; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6ddae2501b9a..cd066b63bdaf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -97,7 +97,7 @@ static int max_queued_requests = 1024; | |||
97 | 97 | ||
98 | static void allow_barrier(struct r10conf *conf); | 98 | static void allow_barrier(struct r10conf *conf); |
99 | static void lower_barrier(struct r10conf *conf); | 99 | static void lower_barrier(struct r10conf *conf); |
100 | static int enough(struct r10conf *conf, int ignore); | 100 | static int _enough(struct r10conf *conf, int previous, int ignore); |
101 | static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, | 101 | static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, |
102 | int *skipped); | 102 | int *skipped); |
103 | static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); | 103 | static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); |
@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error) | |||
392 | * than fail the last device. Here we redefine | 392 | * than fail the last device. Here we redefine |
393 | * "uptodate" to mean "Don't want to retry" | 393 | * "uptodate" to mean "Don't want to retry" |
394 | */ | 394 | */ |
395 | unsigned long flags; | 395 | if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state), |
396 | spin_lock_irqsave(&conf->device_lock, flags); | 396 | rdev->raid_disk)) |
397 | if (!enough(conf, rdev->raid_disk)) | ||
398 | uptodate = 1; | 397 | uptodate = 1; |
399 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
400 | } | 398 | } |
401 | if (uptodate) { | 399 | if (uptodate) { |
402 | raid_end_bio_io(r10_bio); | 400 | raid_end_bio_io(r10_bio); |
@@ -1632,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev) | |||
1632 | * Don't consider the device numbered 'ignore' | 1630 | * Don't consider the device numbered 'ignore' |
1633 | * as we might be about to remove it. | 1631 | * as we might be about to remove it. |
1634 | */ | 1632 | */ |
1635 | static int _enough(struct r10conf *conf, struct geom *geo, int ignore) | 1633 | static int _enough(struct r10conf *conf, int previous, int ignore) |
1636 | { | 1634 | { |
1637 | int first = 0; | 1635 | int first = 0; |
1636 | int has_enough = 0; | ||
1637 | int disks, ncopies; | ||
1638 | if (previous) { | ||
1639 | disks = conf->prev.raid_disks; | ||
1640 | ncopies = conf->prev.near_copies; | ||
1641 | } else { | ||
1642 | disks = conf->geo.raid_disks; | ||
1643 | ncopies = conf->geo.near_copies; | ||
1644 | } | ||
1638 | 1645 | ||
1646 | rcu_read_lock(); | ||
1639 | do { | 1647 | do { |
1640 | int n = conf->copies; | 1648 | int n = conf->copies; |
1641 | int cnt = 0; | 1649 | int cnt = 0; |
1642 | int this = first; | 1650 | int this = first; |
1643 | while (n--) { | 1651 | while (n--) { |
1644 | if (conf->mirrors[this].rdev && | 1652 | struct md_rdev *rdev; |
1645 | this != ignore) | 1653 | if (this != ignore && |
1654 | (rdev = rcu_dereference(conf->mirrors[this].rdev)) && | ||
1655 | test_bit(In_sync, &rdev->flags)) | ||
1646 | cnt++; | 1656 | cnt++; |
1647 | this = (this+1) % geo->raid_disks; | 1657 | this = (this+1) % disks; |
1648 | } | 1658 | } |
1649 | if (cnt == 0) | 1659 | if (cnt == 0) |
1650 | return 0; | 1660 | goto out; |
1651 | first = (first + geo->near_copies) % geo->raid_disks; | 1661 | first = (first + ncopies) % disks; |
1652 | } while (first != 0); | 1662 | } while (first != 0); |
1653 | return 1; | 1663 | has_enough = 1; |
1664 | out: | ||
1665 | rcu_read_unlock(); | ||
1666 | return has_enough; | ||
1654 | } | 1667 | } |
1655 | 1668 | ||
1656 | static int enough(struct r10conf *conf, int ignore) | 1669 | static int enough(struct r10conf *conf, int ignore) |
1657 | { | 1670 | { |
1658 | return _enough(conf, &conf->geo, ignore) && | 1671 | /* when calling 'enough', both 'prev' and 'geo' must |
1659 | _enough(conf, &conf->prev, ignore); | 1672 | * be stable. |
1673 | * This is ensured if ->reconfig_mutex or ->device_lock | ||
1674 | * is held. | ||
1675 | */ | ||
1676 | return _enough(conf, 0, ignore) && | ||
1677 | _enough(conf, 1, ignore); | ||
1660 | } | 1678 | } |
1661 | 1679 | ||
1662 | static void error(struct mddev *mddev, struct md_rdev *rdev) | 1680 | static void error(struct mddev *mddev, struct md_rdev *rdev) |
1663 | { | 1681 | { |
1664 | char b[BDEVNAME_SIZE]; | 1682 | char b[BDEVNAME_SIZE]; |
1665 | struct r10conf *conf = mddev->private; | 1683 | struct r10conf *conf = mddev->private; |
1684 | unsigned long flags; | ||
1666 | 1685 | ||
1667 | /* | 1686 | /* |
1668 | * If it is not operational, then we have already marked it as dead | 1687 | * If it is not operational, then we have already marked it as dead |
@@ -1670,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
1670 | * next level up know. | 1689 | * next level up know. |
1671 | * else mark the drive as failed | 1690 | * else mark the drive as failed |
1672 | */ | 1691 | */ |
1692 | spin_lock_irqsave(&conf->device_lock, flags); | ||
1673 | if (test_bit(In_sync, &rdev->flags) | 1693 | if (test_bit(In_sync, &rdev->flags) |
1674 | && !enough(conf, rdev->raid_disk)) | 1694 | && !enough(conf, rdev->raid_disk)) { |
1675 | /* | 1695 | /* |
1676 | * Don't fail the drive, just return an IO error. | 1696 | * Don't fail the drive, just return an IO error. |
1677 | */ | 1697 | */ |
1698 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1678 | return; | 1699 | return; |
1700 | } | ||
1679 | if (test_and_clear_bit(In_sync, &rdev->flags)) { | 1701 | if (test_and_clear_bit(In_sync, &rdev->flags)) { |
1680 | unsigned long flags; | ||
1681 | spin_lock_irqsave(&conf->device_lock, flags); | ||
1682 | mddev->degraded++; | 1702 | mddev->degraded++; |
1683 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1703 | /* |
1684 | /* | ||
1685 | * if recovery is running, make sure it aborts. | 1704 | * if recovery is running, make sure it aborts. |
1686 | */ | 1705 | */ |
1687 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 1706 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
@@ -1689,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
1689 | set_bit(Blocked, &rdev->flags); | 1708 | set_bit(Blocked, &rdev->flags); |
1690 | set_bit(Faulty, &rdev->flags); | 1709 | set_bit(Faulty, &rdev->flags); |
1691 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 1710 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
1711 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
1692 | printk(KERN_ALERT | 1712 | printk(KERN_ALERT |
1693 | "md/raid10:%s: Disk failure on %s, disabling device.\n" | 1713 | "md/raid10:%s: Disk failure on %s, disabling device.\n" |
1694 | "md/raid10:%s: Operation continuing on %d devices.\n", | 1714 | "md/raid10:%s: Operation continuing on %d devices.\n", |
@@ -1791,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1791 | * very different from resync | 1811 | * very different from resync |
1792 | */ | 1812 | */ |
1793 | return -EBUSY; | 1813 | return -EBUSY; |
1794 | if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) | 1814 | if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1)) |
1795 | return -EINVAL; | 1815 | return -EINVAL; |
1796 | 1816 | ||
1797 | if (rdev->raid_disk >= 0) | 1817 | if (rdev->raid_disk >= 0) |
@@ -1819,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1819 | set_bit(Replacement, &rdev->flags); | 1839 | set_bit(Replacement, &rdev->flags); |
1820 | rdev->raid_disk = mirror; | 1840 | rdev->raid_disk = mirror; |
1821 | err = 0; | 1841 | err = 0; |
1822 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1842 | if (mddev->gendisk) |
1823 | rdev->data_offset << 9); | 1843 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1844 | rdev->data_offset << 9); | ||
1824 | conf->fullsync = 1; | 1845 | conf->fullsync = 1; |
1825 | rcu_assign_pointer(p->replacement, rdev); | 1846 | rcu_assign_pointer(p->replacement, rdev); |
1826 | break; | 1847 | break; |
1827 | } | 1848 | } |
1828 | 1849 | ||
1829 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1850 | if (mddev->gendisk) |
1830 | rdev->data_offset << 9); | 1851 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1852 | rdev->data_offset << 9); | ||
1831 | 1853 | ||
1832 | p->head_position = 0; | 1854 | p->head_position = 0; |
1833 | p->recovery_disabled = mddev->recovery_disabled - 1; | 1855 | p->recovery_disabled = mddev->recovery_disabled - 1; |
@@ -2909,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2909 | */ | 2931 | */ |
2910 | if (mddev->bitmap == NULL && | 2932 | if (mddev->bitmap == NULL && |
2911 | mddev->recovery_cp == MaxSector && | 2933 | mddev->recovery_cp == MaxSector && |
2934 | mddev->reshape_position == MaxSector && | ||
2935 | !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
2912 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && | 2936 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && |
2937 | !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && | ||
2913 | conf->fullsync == 0) { | 2938 | conf->fullsync == 0) { |
2914 | *skipped = 1; | 2939 | *skipped = 1; |
2915 | max_sector = mddev->dev_sectors; | 2940 | return mddev->dev_sectors - sector_nr; |
2916 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || | ||
2917 | test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | ||
2918 | max_sector = mddev->resync_max_sectors; | ||
2919 | return max_sector - sector_nr; | ||
2920 | } | 2941 | } |
2921 | 2942 | ||
2922 | skipped: | 2943 | skipped: |
@@ -3532,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
3532 | 3553 | ||
3533 | /* FIXME calc properly */ | 3554 | /* FIXME calc properly */ |
3534 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + | 3555 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + |
3535 | max(0,mddev->delta_disks)), | 3556 | max(0,-mddev->delta_disks)), |
3536 | GFP_KERNEL); | 3557 | GFP_KERNEL); |
3537 | if (!conf->mirrors) | 3558 | if (!conf->mirrors) |
3538 | goto out; | 3559 | goto out; |
@@ -3691,7 +3712,7 @@ static int run(struct mddev *mddev) | |||
3691 | conf->geo.far_offset == 0) | 3712 | conf->geo.far_offset == 0) |
3692 | goto out_free_conf; | 3713 | goto out_free_conf; |
3693 | if (conf->prev.far_copies != 1 && | 3714 | if (conf->prev.far_copies != 1 && |
3694 | conf->geo.far_offset == 0) | 3715 | conf->prev.far_offset == 0) |
3695 | goto out_free_conf; | 3716 | goto out_free_conf; |
3696 | } | 3717 | } |
3697 | 3718 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 05e4a105b9c7..2bf094a587cb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -4924,7 +4924,7 @@ raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len) | |||
4924 | if (!conf) | 4924 | if (!conf) |
4925 | return -ENODEV; | 4925 | return -ENODEV; |
4926 | 4926 | ||
4927 | if (strict_strtoul(page, 10, &new)) | 4927 | if (kstrtoul(page, 10, &new)) |
4928 | return -EINVAL; | 4928 | return -EINVAL; |
4929 | err = raid5_set_cache_size(mddev, new); | 4929 | err = raid5_set_cache_size(mddev, new); |
4930 | if (err) | 4930 | if (err) |
@@ -4957,7 +4957,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len) | |||
4957 | if (!conf) | 4957 | if (!conf) |
4958 | return -ENODEV; | 4958 | return -ENODEV; |
4959 | 4959 | ||
4960 | if (strict_strtoul(page, 10, &new)) | 4960 | if (kstrtoul(page, 10, &new)) |
4961 | return -EINVAL; | 4961 | return -EINVAL; |
4962 | if (new > conf->max_nr_stripes) | 4962 | if (new > conf->max_nr_stripes) |
4963 | return -EINVAL; | 4963 | return -EINVAL; |
@@ -5914,7 +5914,7 @@ static int check_reshape(struct mddev *mddev) | |||
5914 | return 0; /* nothing to do */ | 5914 | return 0; /* nothing to do */ |
5915 | if (has_failed(conf)) | 5915 | if (has_failed(conf)) |
5916 | return -EINVAL; | 5916 | return -EINVAL; |
5917 | if (mddev->delta_disks < 0) { | 5917 | if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) { |
5918 | /* We might be able to shrink, but the devices must | 5918 | /* We might be able to shrink, but the devices must |
5919 | * be made bigger first. | 5919 | * be made bigger first. |
5920 | * For raid6, 4 is the minimum size. | 5920 | * For raid6, 4 is the minimum size. |