diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-24 20:41:50 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-24 20:41:50 -0500 |
commit | 5c85121bf618aece49155f6eea0d0b2c14c1a121 (patch) | |
tree | 2991bd0bf74f9e5a3ad4186d64514eee1ceb90cb | |
parent | 4d8880a0ee5b3cdf7927c6cf59a164f352e4f436 (diff) | |
parent | 7da9d450ab2843bf1db378c156acc6304dbc1c2b (diff) |
Merge tag 'md/3.14' of git://neil.brown.name/md
Pull md updates from Neil Brown:
"All bug fixes, two tagged for -stable"
* tag 'md/3.14' of git://neil.brown.name/md:
md/raid5: close recently introduced race in stripe_head management.
md/raid5: fix long-standing problem with bitmap handling on write failure.
md: check command validity early in md_ioctl().
md: ensure metadata is writen after raid level change.
md/raid10: avoid fullsync when not necessary.
md: allow a partially recovered device to be hot-added to an array.
md: Change handling of save_raid_disk and metadata update during recovery.
-rw-r--r-- | drivers/md/md.c | 76 | ||||
-rw-r--r-- | drivers/md/raid10.c | 3 | ||||
-rw-r--r-- | drivers/md/raid5.c | 9 | ||||
-rw-r--r-- | include/uapi/linux/raid/md_p.h | 6 |
4 files changed, 68 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 369d919bdafe..40c531359a15 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1173,6 +1173,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1173 | desc->raid_disk < mddev->raid_disks */) { | 1173 | desc->raid_disk < mddev->raid_disks */) { |
1174 | set_bit(In_sync, &rdev->flags); | 1174 | set_bit(In_sync, &rdev->flags); |
1175 | rdev->raid_disk = desc->raid_disk; | 1175 | rdev->raid_disk = desc->raid_disk; |
1176 | rdev->saved_raid_disk = desc->raid_disk; | ||
1176 | } else if (desc->state & (1<<MD_DISK_ACTIVE)) { | 1177 | } else if (desc->state & (1<<MD_DISK_ACTIVE)) { |
1177 | /* active but not in sync implies recovery up to | 1178 | /* active but not in sync implies recovery up to |
1178 | * reshape position. We don't know exactly where | 1179 | * reshape position. We don't know exactly where |
@@ -1671,10 +1672,14 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) | |||
1671 | set_bit(Faulty, &rdev->flags); | 1672 | set_bit(Faulty, &rdev->flags); |
1672 | break; | 1673 | break; |
1673 | default: | 1674 | default: |
1675 | rdev->saved_raid_disk = role; | ||
1674 | if ((le32_to_cpu(sb->feature_map) & | 1676 | if ((le32_to_cpu(sb->feature_map) & |
1675 | MD_FEATURE_RECOVERY_OFFSET)) | 1677 | MD_FEATURE_RECOVERY_OFFSET)) { |
1676 | rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); | 1678 | rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); |
1677 | else | 1679 | if (!(le32_to_cpu(sb->feature_map) & |
1680 | MD_FEATURE_RECOVERY_BITMAP)) | ||
1681 | rdev->saved_raid_disk = -1; | ||
1682 | } else | ||
1678 | set_bit(In_sync, &rdev->flags); | 1683 | set_bit(In_sync, &rdev->flags); |
1679 | rdev->raid_disk = role; | 1684 | rdev->raid_disk = role; |
1680 | break; | 1685 | break; |
@@ -1736,6 +1741,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) | |||
1736 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | 1741 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); |
1737 | sb->recovery_offset = | 1742 | sb->recovery_offset = |
1738 | cpu_to_le64(rdev->recovery_offset); | 1743 | cpu_to_le64(rdev->recovery_offset); |
1744 | if (rdev->saved_raid_disk >= 0 && mddev->bitmap) | ||
1745 | sb->feature_map |= | ||
1746 | cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP); | ||
1739 | } | 1747 | } |
1740 | if (test_bit(Replacement, &rdev->flags)) | 1748 | if (test_bit(Replacement, &rdev->flags)) |
1741 | sb->feature_map |= | 1749 | sb->feature_map |= |
@@ -2477,8 +2485,7 @@ repeat: | |||
2477 | if (rdev->sb_loaded != 1) | 2485 | if (rdev->sb_loaded != 1) |
2478 | continue; /* no noise on spare devices */ | 2486 | continue; /* no noise on spare devices */ |
2479 | 2487 | ||
2480 | if (!test_bit(Faulty, &rdev->flags) && | 2488 | if (!test_bit(Faulty, &rdev->flags)) { |
2481 | rdev->saved_raid_disk == -1) { | ||
2482 | md_super_write(mddev,rdev, | 2489 | md_super_write(mddev,rdev, |
2483 | rdev->sb_start, rdev->sb_size, | 2490 | rdev->sb_start, rdev->sb_size, |
2484 | rdev->sb_page); | 2491 | rdev->sb_page); |
@@ -2494,11 +2501,9 @@ repeat: | |||
2494 | rdev->badblocks.size = 0; | 2501 | rdev->badblocks.size = 0; |
2495 | } | 2502 | } |
2496 | 2503 | ||
2497 | } else if (test_bit(Faulty, &rdev->flags)) | 2504 | } else |
2498 | pr_debug("md: %s (skipping faulty)\n", | 2505 | pr_debug("md: %s (skipping faulty)\n", |
2499 | bdevname(rdev->bdev, b)); | 2506 | bdevname(rdev->bdev, b)); |
2500 | else | ||
2501 | pr_debug("(skipping incremental s/r "); | ||
2502 | 2507 | ||
2503 | if (mddev->level == LEVEL_MULTIPATH) | 2508 | if (mddev->level == LEVEL_MULTIPATH) |
2504 | /* only need to write one superblock... */ | 2509 | /* only need to write one superblock... */ |
@@ -2614,6 +2619,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2614 | * blocked - sets the Blocked flags | 2619 | * blocked - sets the Blocked flags |
2615 | * -blocked - clears the Blocked and possibly simulates an error | 2620 | * -blocked - clears the Blocked and possibly simulates an error |
2616 | * insync - sets Insync providing device isn't active | 2621 | * insync - sets Insync providing device isn't active |
2622 | * -insync - clear Insync for a device with a slot assigned, | ||
2623 | * so that it gets rebuilt based on bitmap | ||
2617 | * write_error - sets WriteErrorSeen | 2624 | * write_error - sets WriteErrorSeen |
2618 | * -write_error - clears WriteErrorSeen | 2625 | * -write_error - clears WriteErrorSeen |
2619 | */ | 2626 | */ |
@@ -2662,6 +2669,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2662 | } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { | 2669 | } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { |
2663 | set_bit(In_sync, &rdev->flags); | 2670 | set_bit(In_sync, &rdev->flags); |
2664 | err = 0; | 2671 | err = 0; |
2672 | } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) { | ||
2673 | clear_bit(In_sync, &rdev->flags); | ||
2674 | rdev->saved_raid_disk = rdev->raid_disk; | ||
2675 | rdev->raid_disk = -1; | ||
2676 | err = 0; | ||
2665 | } else if (cmd_match(buf, "write_error")) { | 2677 | } else if (cmd_match(buf, "write_error")) { |
2666 | set_bit(WriteErrorSeen, &rdev->flags); | 2678 | set_bit(WriteErrorSeen, &rdev->flags); |
2667 | err = 0; | 2679 | err = 0; |
@@ -3589,6 +3601,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len) | |||
3589 | pers->run(mddev); | 3601 | pers->run(mddev); |
3590 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3602 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
3591 | mddev_resume(mddev); | 3603 | mddev_resume(mddev); |
3604 | if (!mddev->thread) | ||
3605 | md_update_sb(mddev, 1); | ||
3592 | sysfs_notify(&mddev->kobj, NULL, "level"); | 3606 | sysfs_notify(&mddev->kobj, NULL, "level"); |
3593 | md_new_event(mddev); | 3607 | md_new_event(mddev); |
3594 | return rv; | 3608 | return rv; |
@@ -5770,6 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) | |||
5770 | clear_bit(Bitmap_sync, &rdev->flags); | 5784 | clear_bit(Bitmap_sync, &rdev->flags); |
5771 | } else | 5785 | } else |
5772 | rdev->raid_disk = -1; | 5786 | rdev->raid_disk = -1; |
5787 | rdev->saved_raid_disk = rdev->raid_disk; | ||
5773 | } else | 5788 | } else |
5774 | super_types[mddev->major_version]. | 5789 | super_types[mddev->major_version]. |
5775 | validate_super(mddev, rdev); | 5790 | validate_super(mddev, rdev); |
@@ -5782,11 +5797,6 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info) | |||
5782 | return -EINVAL; | 5797 | return -EINVAL; |
5783 | } | 5798 | } |
5784 | 5799 | ||
5785 | if (test_bit(In_sync, &rdev->flags)) | ||
5786 | rdev->saved_raid_disk = rdev->raid_disk; | ||
5787 | else | ||
5788 | rdev->saved_raid_disk = -1; | ||
5789 | |||
5790 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ | 5800 | clear_bit(In_sync, &rdev->flags); /* just to be sure */ |
5791 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | 5801 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) |
5792 | set_bit(WriteMostly, &rdev->flags); | 5802 | set_bit(WriteMostly, &rdev->flags); |
@@ -6336,6 +6346,32 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) | |||
6336 | return 0; | 6346 | return 0; |
6337 | } | 6347 | } |
6338 | 6348 | ||
6349 | static inline bool md_ioctl_valid(unsigned int cmd) | ||
6350 | { | ||
6351 | switch (cmd) { | ||
6352 | case ADD_NEW_DISK: | ||
6353 | case BLKROSET: | ||
6354 | case GET_ARRAY_INFO: | ||
6355 | case GET_BITMAP_FILE: | ||
6356 | case GET_DISK_INFO: | ||
6357 | case HOT_ADD_DISK: | ||
6358 | case HOT_REMOVE_DISK: | ||
6359 | case PRINT_RAID_DEBUG: | ||
6360 | case RAID_AUTORUN: | ||
6361 | case RAID_VERSION: | ||
6362 | case RESTART_ARRAY_RW: | ||
6363 | case RUN_ARRAY: | ||
6364 | case SET_ARRAY_INFO: | ||
6365 | case SET_BITMAP_FILE: | ||
6366 | case SET_DISK_FAULTY: | ||
6367 | case STOP_ARRAY: | ||
6368 | case STOP_ARRAY_RO: | ||
6369 | return true; | ||
6370 | default: | ||
6371 | return false; | ||
6372 | } | ||
6373 | } | ||
6374 | |||
6339 | static int md_ioctl(struct block_device *bdev, fmode_t mode, | 6375 | static int md_ioctl(struct block_device *bdev, fmode_t mode, |
6340 | unsigned int cmd, unsigned long arg) | 6376 | unsigned int cmd, unsigned long arg) |
6341 | { | 6377 | { |
@@ -6344,6 +6380,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, | |||
6344 | struct mddev *mddev = NULL; | 6380 | struct mddev *mddev = NULL; |
6345 | int ro; | 6381 | int ro; |
6346 | 6382 | ||
6383 | if (!md_ioctl_valid(cmd)) | ||
6384 | return -ENOTTY; | ||
6385 | |||
6347 | switch (cmd) { | 6386 | switch (cmd) { |
6348 | case RAID_VERSION: | 6387 | case RAID_VERSION: |
6349 | case GET_ARRAY_INFO: | 6388 | case GET_ARRAY_INFO: |
@@ -7718,7 +7757,8 @@ static int remove_and_add_spares(struct mddev *mddev, | |||
7718 | !test_bit(Bitmap_sync, &rdev->flags))) | 7757 | !test_bit(Bitmap_sync, &rdev->flags))) |
7719 | continue; | 7758 | continue; |
7720 | 7759 | ||
7721 | rdev->recovery_offset = 0; | 7760 | if (rdev->saved_raid_disk < 0) |
7761 | rdev->recovery_offset = 0; | ||
7722 | if (mddev->pers-> | 7762 | if (mddev->pers-> |
7723 | hot_add_disk(mddev, rdev) == 0) { | 7763 | hot_add_disk(mddev, rdev) == 0) { |
7724 | if (sysfs_link_rdev(mddev, rdev)) | 7764 | if (sysfs_link_rdev(mddev, rdev)) |
@@ -7938,14 +7978,10 @@ void md_reap_sync_thread(struct mddev *mddev) | |||
7938 | mddev->pers->finish_reshape(mddev); | 7978 | mddev->pers->finish_reshape(mddev); |
7939 | 7979 | ||
7940 | /* If array is no-longer degraded, then any saved_raid_disk | 7980 | /* If array is no-longer degraded, then any saved_raid_disk |
7941 | * information must be scrapped. Also if any device is now | 7981 | * information must be scrapped. |
7942 | * In_sync we must scrape the saved_raid_disk for that device | ||
7943 | * do the superblock for an incrementally recovered device | ||
7944 | * written out. | ||
7945 | */ | 7982 | */ |
7946 | rdev_for_each(rdev, mddev) | 7983 | if (!mddev->degraded) |
7947 | if (!mddev->degraded || | 7984 | rdev_for_each(rdev, mddev) |
7948 | test_bit(In_sync, &rdev->flags)) | ||
7949 | rdev->saved_raid_disk = -1; | 7985 | rdev->saved_raid_disk = -1; |
7950 | 7986 | ||
7951 | md_update_sb(mddev, 1); | 7987 | md_update_sb(mddev, 1); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 06eeb99ea6fc..8d39d63281b9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -3747,7 +3747,8 @@ static int run(struct mddev *mddev) | |||
3747 | !test_bit(In_sync, &disk->rdev->flags)) { | 3747 | !test_bit(In_sync, &disk->rdev->flags)) { |
3748 | disk->head_position = 0; | 3748 | disk->head_position = 0; |
3749 | mddev->degraded++; | 3749 | mddev->degraded++; |
3750 | if (disk->rdev) | 3750 | if (disk->rdev && |
3751 | disk->rdev->saved_raid_disk < 0) | ||
3751 | conf->fullsync = 1; | 3752 | conf->fullsync = 1; |
3752 | } | 3753 | } |
3753 | disk->recovery_disabled = mddev->recovery_disabled - 1; | 3754 | disk->recovery_disabled = mddev->recovery_disabled - 1; |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cbb15716a5db..03f82ab87d9e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
675 | || !conf->inactive_blocked), | 675 | || !conf->inactive_blocked), |
676 | *(conf->hash_locks + hash)); | 676 | *(conf->hash_locks + hash)); |
677 | conf->inactive_blocked = 0; | 677 | conf->inactive_blocked = 0; |
678 | } else | 678 | } else { |
679 | init_stripe(sh, sector, previous); | 679 | init_stripe(sh, sector, previous); |
680 | atomic_inc(&sh->count); | ||
681 | } | ||
680 | } else { | 682 | } else { |
681 | spin_lock(&conf->device_lock); | 683 | spin_lock(&conf->device_lock); |
682 | if (atomic_read(&sh->count)) { | 684 | if (atomic_read(&sh->count)) { |
@@ -695,13 +697,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
695 | sh->group = NULL; | 697 | sh->group = NULL; |
696 | } | 698 | } |
697 | } | 699 | } |
700 | atomic_inc(&sh->count); | ||
698 | spin_unlock(&conf->device_lock); | 701 | spin_unlock(&conf->device_lock); |
699 | } | 702 | } |
700 | } while (sh == NULL); | 703 | } while (sh == NULL); |
701 | 704 | ||
702 | if (sh) | ||
703 | atomic_inc(&sh->count); | ||
704 | |||
705 | spin_unlock_irq(conf->hash_locks + hash); | 705 | spin_unlock_irq(conf->hash_locks + hash); |
706 | return sh; | 706 | return sh; |
707 | } | 707 | } |
@@ -2111,6 +2111,7 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
2111 | set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); | 2111 | set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); |
2112 | } else { | 2112 | } else { |
2113 | if (!uptodate) { | 2113 | if (!uptodate) { |
2114 | set_bit(STRIPE_DEGRADED, &sh->state); | ||
2114 | set_bit(WriteErrorSeen, &rdev->flags); | 2115 | set_bit(WriteErrorSeen, &rdev->flags); |
2115 | set_bit(R5_WriteError, &sh->dev[i].flags); | 2116 | set_bit(R5_WriteError, &sh->dev[i].flags); |
2116 | if (!test_and_set_bit(WantReplacement, &rdev->flags)) | 2117 | if (!test_and_set_bit(WantReplacement, &rdev->flags)) |
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index f7cf7f351144..49f4210d4394 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h | |||
@@ -292,6 +292,9 @@ struct mdp_superblock_1 { | |||
292 | * backwards anyway. | 292 | * backwards anyway. |
293 | */ | 293 | */ |
294 | #define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ | 294 | #define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ |
295 | #define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening | ||
296 | * is guided by bitmap. | ||
297 | */ | ||
295 | #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ | 298 | #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |
296 | |MD_FEATURE_RECOVERY_OFFSET \ | 299 | |MD_FEATURE_RECOVERY_OFFSET \ |
297 | |MD_FEATURE_RESHAPE_ACTIVE \ | 300 | |MD_FEATURE_RESHAPE_ACTIVE \ |
@@ -299,6 +302,7 @@ struct mdp_superblock_1 { | |||
299 | |MD_FEATURE_REPLACEMENT \ | 302 | |MD_FEATURE_REPLACEMENT \ |
300 | |MD_FEATURE_RESHAPE_BACKWARDS \ | 303 | |MD_FEATURE_RESHAPE_BACKWARDS \ |
301 | |MD_FEATURE_NEW_OFFSET \ | 304 | |MD_FEATURE_NEW_OFFSET \ |
305 | |MD_FEATURE_RECOVERY_BITMAP \ | ||
302 | ) | 306 | ) |
303 | 307 | ||
304 | #endif | 308 | #endif |