aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-24 20:41:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-24 20:41:50 -0500
commit5c85121bf618aece49155f6eea0d0b2c14c1a121 (patch)
tree2991bd0bf74f9e5a3ad4186d64514eee1ceb90cb
parent4d8880a0ee5b3cdf7927c6cf59a164f352e4f436 (diff)
parent7da9d450ab2843bf1db378c156acc6304dbc1c2b (diff)
Merge tag 'md/3.14' of git://neil.brown.name/md
Pull md updates from Neil Brown: "All bug fixes, two tagged for -stable" * tag 'md/3.14' of git://neil.brown.name/md: md/raid5: close recently introduced race in stripe_head management. md/raid5: fix long-standing problem with bitmap handling on write failure. md: check command validity early in md_ioctl(). md: ensure metadata is writen after raid level change. md/raid10: avoid fullsync when not necessary. md: allow a partially recovered device to be hot-added to an array. md: Change handling of save_raid_disk and metadata update during recovery.
-rw-r--r--drivers/md/md.c76
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c9
-rw-r--r--include/uapi/linux/raid/md_p.h6
4 files changed, 68 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 369d919bdafe..40c531359a15 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1173,6 +1173,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
1173 desc->raid_disk < mddev->raid_disks */) { 1173 desc->raid_disk < mddev->raid_disks */) {
1174 set_bit(In_sync, &rdev->flags); 1174 set_bit(In_sync, &rdev->flags);
1175 rdev->raid_disk = desc->raid_disk; 1175 rdev->raid_disk = desc->raid_disk;
1176 rdev->saved_raid_disk = desc->raid_disk;
1176 } else if (desc->state & (1<<MD_DISK_ACTIVE)) { 1177 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
1177 /* active but not in sync implies recovery up to 1178 /* active but not in sync implies recovery up to
1178 * reshape position. We don't know exactly where 1179 * reshape position. We don't know exactly where
@@ -1671,10 +1672,14 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
1671 set_bit(Faulty, &rdev->flags); 1672 set_bit(Faulty, &rdev->flags);
1672 break; 1673 break;
1673 default: 1674 default:
1675 rdev->saved_raid_disk = role;
1674 if ((le32_to_cpu(sb->feature_map) & 1676 if ((le32_to_cpu(sb->feature_map) &
1675 MD_FEATURE_RECOVERY_OFFSET)) 1677 MD_FEATURE_RECOVERY_OFFSET)) {
1676 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); 1678 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1677 else 1679 if (!(le32_to_cpu(sb->feature_map) &
1680 MD_FEATURE_RECOVERY_BITMAP))
1681 rdev->saved_raid_disk = -1;
1682 } else
1678 set_bit(In_sync, &rdev->flags); 1683 set_bit(In_sync, &rdev->flags);
1679 rdev->raid_disk = role; 1684 rdev->raid_disk = role;
1680 break; 1685 break;
@@ -1736,6 +1741,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
1736 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1741 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1737 sb->recovery_offset = 1742 sb->recovery_offset =
1738 cpu_to_le64(rdev->recovery_offset); 1743 cpu_to_le64(rdev->recovery_offset);
1744 if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
1745 sb->feature_map |=
1746 cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
1739 } 1747 }
1740 if (test_bit(Replacement, &rdev->flags)) 1748 if (test_bit(Replacement, &rdev->flags))
1741 sb->feature_map |= 1749 sb->feature_map |=
@@ -2477,8 +2485,7 @@ repeat:
2477 if (rdev->sb_loaded != 1) 2485 if (rdev->sb_loaded != 1)
2478 continue; /* no noise on spare devices */ 2486 continue; /* no noise on spare devices */
2479 2487
2480 if (!test_bit(Faulty, &rdev->flags) && 2488 if (!test_bit(Faulty, &rdev->flags)) {
2481 rdev->saved_raid_disk == -1) {
2482 md_super_write(mddev,rdev, 2489 md_super_write(mddev,rdev,
2483 rdev->sb_start, rdev->sb_size, 2490 rdev->sb_start, rdev->sb_size,
2484 rdev->sb_page); 2491 rdev->sb_page);
@@ -2494,11 +2501,9 @@ repeat:
2494 rdev->badblocks.size = 0; 2501 rdev->badblocks.size = 0;
2495 } 2502 }
2496 2503
2497 } else if (test_bit(Faulty, &rdev->flags)) 2504 } else
2498 pr_debug("md: %s (skipping faulty)\n", 2505 pr_debug("md: %s (skipping faulty)\n",
2499 bdevname(rdev->bdev, b)); 2506 bdevname(rdev->bdev, b));
2500 else
2501 pr_debug("(skipping incremental s/r ");
2502 2507
2503 if (mddev->level == LEVEL_MULTIPATH) 2508 if (mddev->level == LEVEL_MULTIPATH)
2504 /* only need to write one superblock... */ 2509 /* only need to write one superblock... */
@@ -2614,6 +2619,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
2614 * blocked - sets the Blocked flags 2619 * blocked - sets the Blocked flags
2615 * -blocked - clears the Blocked and possibly simulates an error 2620 * -blocked - clears the Blocked and possibly simulates an error
2616 * insync - sets Insync providing device isn't active 2621 * insync - sets Insync providing device isn't active
2622 * -insync - clear Insync for a device with a slot assigned,
2623 * so that it gets rebuilt based on bitmap
2617 * write_error - sets WriteErrorSeen 2624 * write_error - sets WriteErrorSeen
2618 * -write_error - clears WriteErrorSeen 2625 * -write_error - clears WriteErrorSeen
2619 */ 2626 */
@@ -2662,6 +2669,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
2662 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { 2669 } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
2663 set_bit(In_sync, &rdev->flags); 2670 set_bit(In_sync, &rdev->flags);
2664 err = 0; 2671 err = 0;
2672 } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) {
2673 clear_bit(In_sync, &rdev->flags);
2674 rdev->saved_raid_disk = rdev->raid_disk;
2675 rdev->raid_disk = -1;
2676 err = 0;
2665 } else if (cmd_match(buf, "write_error")) { 2677 } else if (cmd_match(buf, "write_error")) {
2666 set_bit(WriteErrorSeen, &rdev->flags); 2678 set_bit(WriteErrorSeen, &rdev->flags);
2667 err = 0; 2679 err = 0;
@@ -3589,6 +3601,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3589 pers->run(mddev); 3601 pers->run(mddev);
3590 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3602 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3591 mddev_resume(mddev); 3603 mddev_resume(mddev);
3604 if (!mddev->thread)
3605 md_update_sb(mddev, 1);
3592 sysfs_notify(&mddev->kobj, NULL, "level"); 3606 sysfs_notify(&mddev->kobj, NULL, "level");
3593 md_new_event(mddev); 3607 md_new_event(mddev);
3594 return rv; 3608 return rv;
@@ -5770,6 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5770 clear_bit(Bitmap_sync, &rdev->flags); 5784 clear_bit(Bitmap_sync, &rdev->flags);
5771 } else 5785 } else
5772 rdev->raid_disk = -1; 5786 rdev->raid_disk = -1;
5787 rdev->saved_raid_disk = rdev->raid_disk;
5773 } else 5788 } else
5774 super_types[mddev->major_version]. 5789 super_types[mddev->major_version].
5775 validate_super(mddev, rdev); 5790 validate_super(mddev, rdev);
@@ -5782,11 +5797,6 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5782 return -EINVAL; 5797 return -EINVAL;
5783 } 5798 }
5784 5799
5785 if (test_bit(In_sync, &rdev->flags))
5786 rdev->saved_raid_disk = rdev->raid_disk;
5787 else
5788 rdev->saved_raid_disk = -1;
5789
5790 clear_bit(In_sync, &rdev->flags); /* just to be sure */ 5800 clear_bit(In_sync, &rdev->flags); /* just to be sure */
5791 if (info->state & (1<<MD_DISK_WRITEMOSTLY)) 5801 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
5792 set_bit(WriteMostly, &rdev->flags); 5802 set_bit(WriteMostly, &rdev->flags);
@@ -6336,6 +6346,32 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
6336 return 0; 6346 return 0;
6337} 6347}
6338 6348
6349static inline bool md_ioctl_valid(unsigned int cmd)
6350{
6351 switch (cmd) {
6352 case ADD_NEW_DISK:
6353 case BLKROSET:
6354 case GET_ARRAY_INFO:
6355 case GET_BITMAP_FILE:
6356 case GET_DISK_INFO:
6357 case HOT_ADD_DISK:
6358 case HOT_REMOVE_DISK:
6359 case PRINT_RAID_DEBUG:
6360 case RAID_AUTORUN:
6361 case RAID_VERSION:
6362 case RESTART_ARRAY_RW:
6363 case RUN_ARRAY:
6364 case SET_ARRAY_INFO:
6365 case SET_BITMAP_FILE:
6366 case SET_DISK_FAULTY:
6367 case STOP_ARRAY:
6368 case STOP_ARRAY_RO:
6369 return true;
6370 default:
6371 return false;
6372 }
6373}
6374
6339static int md_ioctl(struct block_device *bdev, fmode_t mode, 6375static int md_ioctl(struct block_device *bdev, fmode_t mode,
6340 unsigned int cmd, unsigned long arg) 6376 unsigned int cmd, unsigned long arg)
6341{ 6377{
@@ -6344,6 +6380,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6344 struct mddev *mddev = NULL; 6380 struct mddev *mddev = NULL;
6345 int ro; 6381 int ro;
6346 6382
6383 if (!md_ioctl_valid(cmd))
6384 return -ENOTTY;
6385
6347 switch (cmd) { 6386 switch (cmd) {
6348 case RAID_VERSION: 6387 case RAID_VERSION:
6349 case GET_ARRAY_INFO: 6388 case GET_ARRAY_INFO:
@@ -7718,7 +7757,8 @@ static int remove_and_add_spares(struct mddev *mddev,
7718 !test_bit(Bitmap_sync, &rdev->flags))) 7757 !test_bit(Bitmap_sync, &rdev->flags)))
7719 continue; 7758 continue;
7720 7759
7721 rdev->recovery_offset = 0; 7760 if (rdev->saved_raid_disk < 0)
7761 rdev->recovery_offset = 0;
7722 if (mddev->pers-> 7762 if (mddev->pers->
7723 hot_add_disk(mddev, rdev) == 0) { 7763 hot_add_disk(mddev, rdev) == 0) {
7724 if (sysfs_link_rdev(mddev, rdev)) 7764 if (sysfs_link_rdev(mddev, rdev))
@@ -7938,14 +7978,10 @@ void md_reap_sync_thread(struct mddev *mddev)
7938 mddev->pers->finish_reshape(mddev); 7978 mddev->pers->finish_reshape(mddev);
7939 7979
7940 /* If array is no-longer degraded, then any saved_raid_disk 7980 /* If array is no-longer degraded, then any saved_raid_disk
7941 * information must be scrapped. Also if any device is now 7981 * information must be scrapped.
7942 * In_sync we must scrape the saved_raid_disk for that device
7943 * do the superblock for an incrementally recovered device
7944 * written out.
7945 */ 7982 */
7946 rdev_for_each(rdev, mddev) 7983 if (!mddev->degraded)
7947 if (!mddev->degraded || 7984 rdev_for_each(rdev, mddev)
7948 test_bit(In_sync, &rdev->flags))
7949 rdev->saved_raid_disk = -1; 7985 rdev->saved_raid_disk = -1;
7950 7986
7951 md_update_sb(mddev, 1); 7987 md_update_sb(mddev, 1);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 06eeb99ea6fc..8d39d63281b9 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3747,7 +3747,8 @@ static int run(struct mddev *mddev)
3747 !test_bit(In_sync, &disk->rdev->flags)) { 3747 !test_bit(In_sync, &disk->rdev->flags)) {
3748 disk->head_position = 0; 3748 disk->head_position = 0;
3749 mddev->degraded++; 3749 mddev->degraded++;
3750 if (disk->rdev) 3750 if (disk->rdev &&
3751 disk->rdev->saved_raid_disk < 0)
3751 conf->fullsync = 1; 3752 conf->fullsync = 1;
3752 } 3753 }
3753 disk->recovery_disabled = mddev->recovery_disabled - 1; 3754 disk->recovery_disabled = mddev->recovery_disabled - 1;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cbb15716a5db..03f82ab87d9e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
675 || !conf->inactive_blocked), 675 || !conf->inactive_blocked),
676 *(conf->hash_locks + hash)); 676 *(conf->hash_locks + hash));
677 conf->inactive_blocked = 0; 677 conf->inactive_blocked = 0;
678 } else 678 } else {
679 init_stripe(sh, sector, previous); 679 init_stripe(sh, sector, previous);
680 atomic_inc(&sh->count);
681 }
680 } else { 682 } else {
681 spin_lock(&conf->device_lock); 683 spin_lock(&conf->device_lock);
682 if (atomic_read(&sh->count)) { 684 if (atomic_read(&sh->count)) {
@@ -695,13 +697,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
695 sh->group = NULL; 697 sh->group = NULL;
696 } 698 }
697 } 699 }
700 atomic_inc(&sh->count);
698 spin_unlock(&conf->device_lock); 701 spin_unlock(&conf->device_lock);
699 } 702 }
700 } while (sh == NULL); 703 } while (sh == NULL);
701 704
702 if (sh)
703 atomic_inc(&sh->count);
704
705 spin_unlock_irq(conf->hash_locks + hash); 705 spin_unlock_irq(conf->hash_locks + hash);
706 return sh; 706 return sh;
707} 707}
@@ -2111,6 +2111,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
2111 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 2111 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
2112 } else { 2112 } else {
2113 if (!uptodate) { 2113 if (!uptodate) {
2114 set_bit(STRIPE_DEGRADED, &sh->state);
2114 set_bit(WriteErrorSeen, &rdev->flags); 2115 set_bit(WriteErrorSeen, &rdev->flags);
2115 set_bit(R5_WriteError, &sh->dev[i].flags); 2116 set_bit(R5_WriteError, &sh->dev[i].flags);
2116 if (!test_and_set_bit(WantReplacement, &rdev->flags)) 2117 if (!test_and_set_bit(WantReplacement, &rdev->flags))
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index f7cf7f351144..49f4210d4394 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -292,6 +292,9 @@ struct mdp_superblock_1 {
292 * backwards anyway. 292 * backwards anyway.
293 */ 293 */
294#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ 294#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
295#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
296 * is guided by bitmap.
297 */
295#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ 298#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
296 |MD_FEATURE_RECOVERY_OFFSET \ 299 |MD_FEATURE_RECOVERY_OFFSET \
297 |MD_FEATURE_RESHAPE_ACTIVE \ 300 |MD_FEATURE_RESHAPE_ACTIVE \
@@ -299,6 +302,7 @@ struct mdp_superblock_1 {
299 |MD_FEATURE_REPLACEMENT \ 302 |MD_FEATURE_REPLACEMENT \
300 |MD_FEATURE_RESHAPE_BACKWARDS \ 303 |MD_FEATURE_RESHAPE_BACKWARDS \
301 |MD_FEATURE_NEW_OFFSET \ 304 |MD_FEATURE_NEW_OFFSET \
305 |MD_FEATURE_RECOVERY_BITMAP \
302 ) 306 )
303 307
304#endif 308#endif