diff options
author | NeilBrown <neilb@suse.de> | 2008-02-06 04:39:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 13:41:18 -0500 |
commit | c620727779f7cc8ea96efb71f0651a26349e59c1 (patch) | |
tree | 777abdad9c9ef10cb4df5c0efc736e6c64851ed8 | |
parent | c303da6d713b87b7b3f999f5acce8ecc76ff1adb (diff) |
md: allow a maximum extent to be set for resyncing
This allows userspace to control resync/reshape progress and synchronise it
with other activities, such as shared access in a SAN, or backing up critical
sections during a tricky reshape.
Writing a number of sectors (which must be a multiple of the chunk size if
such is meaningful) causes a resync to pause when it gets to that point.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/md.txt | 10 | ||||
-rw-r--r-- | drivers/md/md.c | 75 | ||||
-rw-r--r-- | drivers/md/raid1.c | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 3 | ||||
-rw-r--r-- | drivers/md/raid5.c | 25 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 2 |
6 files changed, 107 insertions, 10 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index 5818628207b5..396cdd982c26 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -416,6 +416,16 @@ also have | |||
416 | sectors in total that could need to be processed. The two | 416 | sectors in total that could need to be processed. The two |
417 | numbers are separated by a '/' thus effectively showing one | 417 | numbers are separated by a '/' thus effectively showing one |
418 | value, a fraction of the process that is complete. | 418 | value, a fraction of the process that is complete. |
419 | A 'select' on this attribute will return when resync completes, | ||
420 | when it reaches the current sync_max (below) and possibly at | ||
421 | other times. | ||
422 | |||
423 | sync_max | ||
424 | This is a number of sectors at which point a resync/recovery | ||
425 | process will pause. When a resync is active, the value can | ||
426 | only ever be increased, never decreased. The value of 'max' | ||
427 | effectively disables the limit. | ||
428 | |||
419 | 429 | ||
420 | sync_speed | 430 | sync_speed |
421 | This shows the current actual speed, in K/sec, of the current | 431 | This shows the current actual speed, in K/sec, of the current |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 00788c56276f..79eb63fdb4b3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
275 | spin_lock_init(&new->write_lock); | 275 | spin_lock_init(&new->write_lock); |
276 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | 277 | new->reshape_position = MaxSector; |
278 | new->resync_max = MaxSector; | ||
278 | 279 | ||
279 | new->queue = blk_alloc_queue(GFP_KERNEL); | 280 | new->queue = blk_alloc_queue(GFP_KERNEL); |
280 | if (!new->queue) { | 281 | if (!new->queue) { |
@@ -2921,6 +2922,43 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2921 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 2922 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
2922 | 2923 | ||
2923 | static ssize_t | 2924 | static ssize_t |
2925 | max_sync_show(mddev_t *mddev, char *page) | ||
2926 | { | ||
2927 | if (mddev->resync_max == MaxSector) | ||
2928 | return sprintf(page, "max\n"); | ||
2929 | else | ||
2930 | return sprintf(page, "%llu\n", | ||
2931 | (unsigned long long)mddev->resync_max); | ||
2932 | } | ||
2933 | static ssize_t | ||
2934 | max_sync_store(mddev_t *mddev, const char *buf, size_t len) | ||
2935 | { | ||
2936 | if (strncmp(buf, "max", 3) == 0) | ||
2937 | mddev->resync_max = MaxSector; | ||
2938 | else { | ||
2939 | char *ep; | ||
2940 | unsigned long long max = simple_strtoull(buf, &ep, 10); | ||
2941 | if (ep == buf || (*ep != 0 && *ep != '\n')) | ||
2942 | return -EINVAL; | ||
2943 | if (max < mddev->resync_max && | ||
2944 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | ||
2945 | return -EBUSY; | ||
2946 | |||
2947 | /* Must be a multiple of chunk_size */ | ||
2948 | if (mddev->chunk_size) { | ||
2949 | if (max & (sector_t)((mddev->chunk_size>>9)-1)) | ||
2950 | return -EINVAL; | ||
2951 | } | ||
2952 | mddev->resync_max = max; | ||
2953 | } | ||
2954 | wake_up(&mddev->recovery_wait); | ||
2955 | return len; | ||
2956 | } | ||
2957 | |||
2958 | static struct md_sysfs_entry md_max_sync = | ||
2959 | __ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store); | ||
2960 | |||
2961 | static ssize_t | ||
2924 | suspend_lo_show(mddev_t *mddev, char *page) | 2962 | suspend_lo_show(mddev_t *mddev, char *page) |
2925 | { | 2963 | { |
2926 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); | 2964 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); |
@@ -3030,6 +3068,7 @@ static struct attribute *md_redundancy_attrs[] = { | |||
3030 | &md_sync_max.attr, | 3068 | &md_sync_max.attr, |
3031 | &md_sync_speed.attr, | 3069 | &md_sync_speed.attr, |
3032 | &md_sync_completed.attr, | 3070 | &md_sync_completed.attr, |
3071 | &md_max_sync.attr, | ||
3033 | &md_suspend_lo.attr, | 3072 | &md_suspend_lo.attr, |
3034 | &md_suspend_hi.attr, | 3073 | &md_suspend_hi.attr, |
3035 | &md_bitmap.attr, | 3074 | &md_bitmap.attr, |
@@ -3579,6 +3618,7 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3579 | mddev->size = 0; | 3618 | mddev->size = 0; |
3580 | mddev->raid_disks = 0; | 3619 | mddev->raid_disks = 0; |
3581 | mddev->recovery_cp = 0; | 3620 | mddev->recovery_cp = 0; |
3621 | mddev->resync_max = MaxSector; | ||
3582 | mddev->reshape_position = MaxSector; | 3622 | mddev->reshape_position = MaxSector; |
3583 | mddev->external = 0; | 3623 | mddev->external = 0; |
3584 | 3624 | ||
@@ -5443,8 +5483,16 @@ void md_do_sync(mddev_t *mddev) | |||
5443 | sector_t sectors; | 5483 | sector_t sectors; |
5444 | 5484 | ||
5445 | skipped = 0; | 5485 | skipped = 0; |
5486 | if (j >= mddev->resync_max) { | ||
5487 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5488 | wait_event(mddev->recovery_wait, | ||
5489 | mddev->resync_max > j | ||
5490 | || kthread_should_stop()); | ||
5491 | } | ||
5492 | if (kthread_should_stop()) | ||
5493 | goto interrupted; | ||
5446 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 5494 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
5447 | currspeed < speed_min(mddev)); | 5495 | currspeed < speed_min(mddev)); |
5448 | if (sectors == 0) { | 5496 | if (sectors == 0) { |
5449 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 5497 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); |
5450 | goto out; | 5498 | goto out; |
@@ -5486,15 +5534,9 @@ void md_do_sync(mddev_t *mddev) | |||
5486 | } | 5534 | } |
5487 | 5535 | ||
5488 | 5536 | ||
5489 | if (kthread_should_stop()) { | 5537 | if (kthread_should_stop()) |
5490 | /* | 5538 | goto interrupted; |
5491 | * got a signal, exit. | 5539 | |
5492 | */ | ||
5493 | printk(KERN_INFO | ||
5494 | "md: md_do_sync() got signal ... exiting\n"); | ||
5495 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5496 | goto out; | ||
5497 | } | ||
5498 | 5540 | ||
5499 | /* | 5541 | /* |
5500 | * this loop exits only if either when we are slower than | 5542 | * this loop exits only if either when we are slower than |
@@ -5558,9 +5600,22 @@ void md_do_sync(mddev_t *mddev) | |||
5558 | 5600 | ||
5559 | skip: | 5601 | skip: |
5560 | mddev->curr_resync = 0; | 5602 | mddev->curr_resync = 0; |
5603 | mddev->resync_max = MaxSector; | ||
5604 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
5561 | wake_up(&resync_wait); | 5605 | wake_up(&resync_wait); |
5562 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5606 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5563 | md_wakeup_thread(mddev->thread); | 5607 | md_wakeup_thread(mddev->thread); |
5608 | return; | ||
5609 | |||
5610 | interrupted: | ||
5611 | /* | ||
5612 | * got a signal, exit. | ||
5613 | */ | ||
5614 | printk(KERN_INFO | ||
5615 | "md: md_do_sync() got signal ... exiting\n"); | ||
5616 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
5617 | goto out; | ||
5618 | |||
5564 | } | 5619 | } |
5565 | EXPORT_SYMBOL_GPL(md_do_sync); | 5620 | EXPORT_SYMBOL_GPL(md_do_sync); |
5566 | 5621 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e0b8d0dd7a87..ae7c15207df5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1767,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1767 | return rv; | 1767 | return rv; |
1768 | } | 1768 | } |
1769 | 1769 | ||
1770 | if (max_sector > mddev->resync_max) | ||
1771 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ | ||
1770 | nr_sectors = 0; | 1772 | nr_sectors = 0; |
1771 | sync_blocks = 0; | 1773 | sync_blocks = 0; |
1772 | do { | 1774 | do { |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ba125277c6c4..d6f12882424d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1657 | return (max_sector - sector_nr) + sectors_skipped; | 1657 | return (max_sector - sector_nr) + sectors_skipped; |
1658 | } | 1658 | } |
1659 | 1659 | ||
1660 | if (max_sector > mddev->resync_max) | ||
1661 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ | ||
1662 | |||
1660 | /* make sure whole request will fit in a chunk - if chunks | 1663 | /* make sure whole request will fit in a chunk - if chunks |
1661 | * are meaningful | 1664 | * are meaningful |
1662 | */ | 1665 | */ |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 388a974d63ef..e946de6f46bc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3698,6 +3698,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3698 | release_stripe(sh); | 3698 | release_stripe(sh); |
3699 | first_sector += STRIPE_SECTORS; | 3699 | first_sector += STRIPE_SECTORS; |
3700 | } | 3700 | } |
3701 | /* If this takes us to the resync_max point where we have to pause, | ||
3702 | * then we need to write out the superblock. | ||
3703 | */ | ||
3704 | sector_nr += conf->chunk_size>>9; | ||
3705 | if (sector_nr >= mddev->resync_max) { | ||
3706 | /* Cannot proceed until we've updated the superblock... */ | ||
3707 | wait_event(conf->wait_for_overlap, | ||
3708 | atomic_read(&conf->reshape_stripes) == 0); | ||
3709 | mddev->reshape_position = conf->expand_progress; | ||
3710 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
3711 | md_wakeup_thread(mddev->thread); | ||
3712 | wait_event(mddev->sb_wait, | ||
3713 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) | ||
3714 | || kthread_should_stop()); | ||
3715 | spin_lock_irq(&conf->device_lock); | ||
3716 | conf->expand_lo = mddev->reshape_position; | ||
3717 | spin_unlock_irq(&conf->device_lock); | ||
3718 | wake_up(&conf->wait_for_overlap); | ||
3719 | } | ||
3701 | return conf->chunk_size>>9; | 3720 | return conf->chunk_size>>9; |
3702 | } | 3721 | } |
3703 | 3722 | ||
@@ -3734,6 +3753,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
3734 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 3753 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
3735 | return reshape_request(mddev, sector_nr, skipped); | 3754 | return reshape_request(mddev, sector_nr, skipped); |
3736 | 3755 | ||
3756 | /* No need to check resync_max as we never do more than one | ||
3757 | * stripe, and as resync_max will always be on a chunk boundary, | ||
3758 | * if the check in md_do_sync didn't fire, there is no chance | ||
3759 | * of overstepping resync_max here | ||
3760 | */ | ||
3761 | |||
3737 | /* if there is too many failed drives and we are trying | 3762 | /* if there is too many failed drives and we are trying |
3738 | * to resync, then assert that we are finished, because there is | 3763 | * to resync, then assert that we are finished, because there is |
3739 | * nothing we can do. | 3764 | * nothing we can do. |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index b579cc628303..c77dca3221ed 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -219,6 +219,8 @@ struct mddev_s | |||
219 | atomic_t recovery_active; /* blocks scheduled, but not written */ | 219 | atomic_t recovery_active; /* blocks scheduled, but not written */ |
220 | wait_queue_head_t recovery_wait; | 220 | wait_queue_head_t recovery_wait; |
221 | sector_t recovery_cp; | 221 | sector_t recovery_cp; |
222 | sector_t resync_max; /* resync should pause | ||
223 | * when it gets here */ | ||
222 | 224 | ||
223 | spinlock_t write_lock; | 225 | spinlock_t write_lock; |
224 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ | 226 | wait_queue_head_t sb_wait; /* for waiting on superblock updates */ |