aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-02-06 04:39:52 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:41:18 -0500
commitc620727779f7cc8ea96efb71f0651a26349e59c1 (patch)
tree777abdad9c9ef10cb4df5c0efc736e6c64851ed8
parentc303da6d713b87b7b3f999f5acce8ecc76ff1adb (diff)
md: allow a maximum extent to be set for resyncing
This allows userspace to control resync/reshape progress and synchronise it with other activities, such as shared access in a SAN, or backing up critical sections during a tricky reshape. Writing a number of sectors (which must be a multiple of the chunk size if such is meaningful) causes a resync to pause when it gets to that point. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/md.txt10
-rw-r--r--drivers/md/md.c75
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c25
-rw-r--r--include/linux/raid/md_k.h2
6 files changed, 107 insertions, 10 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 5818628207b5..396cdd982c26 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -416,6 +416,16 @@ also have
416 sectors in total that could need to be processed. The two 416 sectors in total that could need to be processed. The two
417 numbers are separated by a '/' thus effectively showing one 417 numbers are separated by a '/' thus effectively showing one
418 value, a fraction of the process that is complete. 418 value, a fraction of the process that is complete.
419 A 'select' on this attribute will return when resync completes,
420 when it reaches the current sync_max (below) and possibly at
421 other times.
422
423 sync_max
424 This is a number of sectors at which point a resync/recovery
425 process will pause. When a resync is active, the value can
426 only ever be increased, never decreased. The value of 'max'
427 effectively disables the limit.
428
419 429
420 sync_speed 430 sync_speed
421 This shows the current actual speed, in K/sec, of the current 431 This shows the current actual speed, in K/sec, of the current
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 00788c56276f..79eb63fdb4b3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
275 spin_lock_init(&new->write_lock); 275 spin_lock_init(&new->write_lock);
276 init_waitqueue_head(&new->sb_wait); 276 init_waitqueue_head(&new->sb_wait);
277 new->reshape_position = MaxSector; 277 new->reshape_position = MaxSector;
278 new->resync_max = MaxSector;
278 279
279 new->queue = blk_alloc_queue(GFP_KERNEL); 280 new->queue = blk_alloc_queue(GFP_KERNEL);
280 if (!new->queue) { 281 if (!new->queue) {
@@ -2921,6 +2922,43 @@ sync_completed_show(mddev_t *mddev, char *page)
2921static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 2922static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
2922 2923
2923static ssize_t 2924static ssize_t
2925max_sync_show(mddev_t *mddev, char *page)
2926{
2927 if (mddev->resync_max == MaxSector)
2928 return sprintf(page, "max\n");
2929 else
2930 return sprintf(page, "%llu\n",
2931 (unsigned long long)mddev->resync_max);
2932}
2933static ssize_t
2934max_sync_store(mddev_t *mddev, const char *buf, size_t len)
2935{
2936 if (strncmp(buf, "max", 3) == 0)
2937 mddev->resync_max = MaxSector;
2938 else {
2939 char *ep;
2940 unsigned long long max = simple_strtoull(buf, &ep, 10);
2941 if (ep == buf || (*ep != 0 && *ep != '\n'))
2942 return -EINVAL;
2943 if (max < mddev->resync_max &&
2944 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2945 return -EBUSY;
2946
2947 /* Must be a multiple of chunk_size */
2948 if (mddev->chunk_size) {
2949 if (max & (sector_t)((mddev->chunk_size>>9)-1))
2950 return -EINVAL;
2951 }
2952 mddev->resync_max = max;
2953 }
2954 wake_up(&mddev->recovery_wait);
2955 return len;
2956}
2957
2958static struct md_sysfs_entry md_max_sync =
2959__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
2960
2961static ssize_t
2924suspend_lo_show(mddev_t *mddev, char *page) 2962suspend_lo_show(mddev_t *mddev, char *page)
2925{ 2963{
2926 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); 2964 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
@@ -3030,6 +3068,7 @@ static struct attribute *md_redundancy_attrs[] = {
3030 &md_sync_max.attr, 3068 &md_sync_max.attr,
3031 &md_sync_speed.attr, 3069 &md_sync_speed.attr,
3032 &md_sync_completed.attr, 3070 &md_sync_completed.attr,
3071 &md_max_sync.attr,
3033 &md_suspend_lo.attr, 3072 &md_suspend_lo.attr,
3034 &md_suspend_hi.attr, 3073 &md_suspend_hi.attr,
3035 &md_bitmap.attr, 3074 &md_bitmap.attr,
@@ -3579,6 +3618,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
3579 mddev->size = 0; 3618 mddev->size = 0;
3580 mddev->raid_disks = 0; 3619 mddev->raid_disks = 0;
3581 mddev->recovery_cp = 0; 3620 mddev->recovery_cp = 0;
3621 mddev->resync_max = MaxSector;
3582 mddev->reshape_position = MaxSector; 3622 mddev->reshape_position = MaxSector;
3583 mddev->external = 0; 3623 mddev->external = 0;
3584 3624
@@ -5443,8 +5483,16 @@ void md_do_sync(mddev_t *mddev)
5443 sector_t sectors; 5483 sector_t sectors;
5444 5484
5445 skipped = 0; 5485 skipped = 0;
5486 if (j >= mddev->resync_max) {
5487 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5488 wait_event(mddev->recovery_wait,
5489 mddev->resync_max > j
5490 || kthread_should_stop());
5491 }
5492 if (kthread_should_stop())
5493 goto interrupted;
5446 sectors = mddev->pers->sync_request(mddev, j, &skipped, 5494 sectors = mddev->pers->sync_request(mddev, j, &skipped,
5447 currspeed < speed_min(mddev)); 5495 currspeed < speed_min(mddev));
5448 if (sectors == 0) { 5496 if (sectors == 0) {
5449 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 5497 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
5450 goto out; 5498 goto out;
@@ -5486,15 +5534,9 @@ void md_do_sync(mddev_t *mddev)
5486 } 5534 }
5487 5535
5488 5536
5489 if (kthread_should_stop()) { 5537 if (kthread_should_stop())
5490 /* 5538 goto interrupted;
5491 * got a signal, exit. 5539
5492 */
5493 printk(KERN_INFO
5494 "md: md_do_sync() got signal ... exiting\n");
5495 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5496 goto out;
5497 }
5498 5540
5499 /* 5541 /*
5500 * this loop exits only if either when we are slower than 5542 * this loop exits only if either when we are slower than
@@ -5558,9 +5600,22 @@ void md_do_sync(mddev_t *mddev)
5558 5600
5559 skip: 5601 skip:
5560 mddev->curr_resync = 0; 5602 mddev->curr_resync = 0;
5603 mddev->resync_max = MaxSector;
5604 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
5561 wake_up(&resync_wait); 5605 wake_up(&resync_wait);
5562 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 5606 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
5563 md_wakeup_thread(mddev->thread); 5607 md_wakeup_thread(mddev->thread);
5608 return;
5609
5610 interrupted:
5611 /*
5612 * got a signal, exit.
5613 */
5614 printk(KERN_INFO
5615 "md: md_do_sync() got signal ... exiting\n");
5616 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5617 goto out;
5618
5564} 5619}
5565EXPORT_SYMBOL_GPL(md_do_sync); 5620EXPORT_SYMBOL_GPL(md_do_sync);
5566 5621
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e0b8d0dd7a87..ae7c15207df5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1767,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1767 return rv; 1767 return rv;
1768 } 1768 }
1769 1769
1770 if (max_sector > mddev->resync_max)
1771 max_sector = mddev->resync_max; /* Don't do IO beyond here */
1770 nr_sectors = 0; 1772 nr_sectors = 0;
1771 sync_blocks = 0; 1773 sync_blocks = 0;
1772 do { 1774 do {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ba125277c6c4..d6f12882424d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1657 return (max_sector - sector_nr) + sectors_skipped; 1657 return (max_sector - sector_nr) + sectors_skipped;
1658 } 1658 }
1659 1659
1660 if (max_sector > mddev->resync_max)
1661 max_sector = mddev->resync_max; /* Don't do IO beyond here */
1662
1660 /* make sure whole request will fit in a chunk - if chunks 1663 /* make sure whole request will fit in a chunk - if chunks
1661 * are meaningful 1664 * are meaningful
1662 */ 1665 */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 388a974d63ef..e946de6f46bc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3698,6 +3698,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3698 release_stripe(sh); 3698 release_stripe(sh);
3699 first_sector += STRIPE_SECTORS; 3699 first_sector += STRIPE_SECTORS;
3700 } 3700 }
3701 /* If this takes us to the resync_max point where we have to pause,
3702 * then we need to write out the superblock.
3703 */
3704 sector_nr += conf->chunk_size>>9;
3705 if (sector_nr >= mddev->resync_max) {
3706 /* Cannot proceed until we've updated the superblock... */
3707 wait_event(conf->wait_for_overlap,
3708 atomic_read(&conf->reshape_stripes) == 0);
3709 mddev->reshape_position = conf->expand_progress;
3710 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3711 md_wakeup_thread(mddev->thread);
3712 wait_event(mddev->sb_wait,
3713 !test_bit(MD_CHANGE_DEVS, &mddev->flags)
3714 || kthread_should_stop());
3715 spin_lock_irq(&conf->device_lock);
3716 conf->expand_lo = mddev->reshape_position;
3717 spin_unlock_irq(&conf->device_lock);
3718 wake_up(&conf->wait_for_overlap);
3719 }
3701 return conf->chunk_size>>9; 3720 return conf->chunk_size>>9;
3702} 3721}
3703 3722
@@ -3734,6 +3753,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
3734 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 3753 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3735 return reshape_request(mddev, sector_nr, skipped); 3754 return reshape_request(mddev, sector_nr, skipped);
3736 3755
3756 /* No need to check resync_max as we never do more than one
3757 * stripe, and as resync_max will always be on a chunk boundary,
3758 * if the check in md_do_sync didn't fire, there is no chance
3759 * of overstepping resync_max here
3760 */
3761
3737 /* if there is too many failed drives and we are trying 3762 /* if there is too many failed drives and we are trying
3738 * to resync, then assert that we are finished, because there is 3763 * to resync, then assert that we are finished, because there is
3739 * nothing we can do. 3764 * nothing we can do.
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index b579cc628303..c77dca3221ed 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -219,6 +219,8 @@ struct mddev_s
219 atomic_t recovery_active; /* blocks scheduled, but not written */ 219 atomic_t recovery_active; /* blocks scheduled, but not written */
220 wait_queue_head_t recovery_wait; 220 wait_queue_head_t recovery_wait;
221 sector_t recovery_cp; 221 sector_t recovery_cp;
222 sector_t resync_max; /* resync should pause
223 * when it gets here */
222 224
223 spinlock_t write_lock; 225 spinlock_t write_lock;
224 wait_queue_head_t sb_wait; /* for waiting on superblock updates */ 226 wait_queue_head_t sb_wait; /* for waiting on superblock updates */