aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-03-27 04:18:14 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:45:02 -0500
commite464eafdb4400c6d6576ba3840d8bd40340f8a96 (patch)
tree0c3f4003c883264ee08300c02007f06e4d1ebb91
parent16484bf59634e25d1299761e5ed8bacf22bc6368 (diff)
[PATCH] md: Support suspending of IO to regions of an md array
This allows user-space to access data safely. This is needed for raid5 reshape as user-space needs to take a backup of the first few stripes before allowing reshape to commence. It will also be useful in cluster-aware raid1 configurations so that all cluster members can leave a section of the array untouched while a resync/recovery happens. A 'start' and 'end' of the suspended range are written to 2 sysfs attributes. Note that only one range can be suspended at a time. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c59
-rw-r--r--drivers/md/raid5.c14
-rw-r--r--include/linux/raid/md_k.h4
3 files changed, 77 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a79dd33d343d..92fd0104fa04 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2365,6 +2365,63 @@ sync_completed_show(mddev_t *mddev, char *page)
2365static struct md_sysfs_entry 2365static struct md_sysfs_entry
2366md_sync_completed = __ATTR_RO(sync_completed); 2366md_sync_completed = __ATTR_RO(sync_completed);
2367 2367
2368static ssize_t
2369suspend_lo_show(mddev_t *mddev, char *page)
2370{
2371 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
2372}
2373
2374static ssize_t
2375suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
2376{
2377 char *e;
2378 unsigned long long new = simple_strtoull(buf, &e, 10);
2379
2380 if (mddev->pers->quiesce == NULL)
2381 return -EINVAL;
2382 if (buf == e || (*e && *e != '\n'))
2383 return -EINVAL;
2384 if (new >= mddev->suspend_hi ||
2385 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
2386 mddev->suspend_lo = new;
2387 mddev->pers->quiesce(mddev, 2);
2388 return len;
2389 } else
2390 return -EINVAL;
2391}
2392static struct md_sysfs_entry md_suspend_lo =
2393__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
2394
2395
2396static ssize_t
2397suspend_hi_show(mddev_t *mddev, char *page)
2398{
2399 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
2400}
2401
2402static ssize_t
2403suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
2404{
2405 char *e;
2406 unsigned long long new = simple_strtoull(buf, &e, 10);
2407
2408 if (mddev->pers->quiesce == NULL)
2409 return -EINVAL;
2410 if (buf == e || (*e && *e != '\n'))
2411 return -EINVAL;
2412 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
2413 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
2414 mddev->suspend_hi = new;
2415 mddev->pers->quiesce(mddev, 1);
2416 mddev->pers->quiesce(mddev, 0);
2417 return len;
2418 } else
2419 return -EINVAL;
2420}
2421static struct md_sysfs_entry md_suspend_hi =
2422__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
2423
2424
2368static struct attribute *md_default_attrs[] = { 2425static struct attribute *md_default_attrs[] = {
2369 &md_level.attr, 2426 &md_level.attr,
2370 &md_raid_disks.attr, 2427 &md_raid_disks.attr,
@@ -2382,6 +2439,8 @@ static struct attribute *md_redundancy_attrs[] = {
2382 &md_sync_max.attr, 2439 &md_sync_max.attr,
2383 &md_sync_speed.attr, 2440 &md_sync_speed.attr,
2384 &md_sync_completed.attr, 2441 &md_sync_completed.attr,
2442 &md_suspend_lo.attr,
2443 &md_suspend_hi.attr,
2385 NULL, 2444 NULL,
2386}; 2445};
2387static struct attribute_group md_redundancy_group = { 2446static struct attribute_group md_redundancy_group = {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 355dafb98aac..bb16ac231a40 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1805,6 +1805,15 @@ static int make_request(request_queue_t *q, struct bio * bi)
1805 goto retry; 1805 goto retry;
1806 } 1806 }
1807 } 1807 }
1808 /* FIXME what if we get a false positive because these
1809 * are being updated.
1810 */
1811 if (logical_sector >= mddev->suspend_lo &&
1812 logical_sector < mddev->suspend_hi) {
1813 release_stripe(sh);
1814 schedule();
1815 goto retry;
1816 }
1808 1817
1809 if (test_bit(STRIPE_EXPANDING, &sh->state) || 1818 if (test_bit(STRIPE_EXPANDING, &sh->state) ||
1810 !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { 1819 !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
@@ -2725,6 +2734,10 @@ static void raid5_quiesce(mddev_t *mddev, int state)
2725 raid5_conf_t *conf = mddev_to_conf(mddev); 2734 raid5_conf_t *conf = mddev_to_conf(mddev);
2726 2735
2727 switch(state) { 2736 switch(state) {
2737 case 2: /* resume for a suspend */
2738 wake_up(&conf->wait_for_overlap);
2739 break;
2740
2728 case 1: /* stop all writes */ 2741 case 1: /* stop all writes */
2729 spin_lock_irq(&conf->device_lock); 2742 spin_lock_irq(&conf->device_lock);
2730 conf->quiesce = 1; 2743 conf->quiesce = 1;
@@ -2738,6 +2751,7 @@ static void raid5_quiesce(mddev_t *mddev, int state)
2738 spin_lock_irq(&conf->device_lock); 2751 spin_lock_irq(&conf->device_lock);
2739 conf->quiesce = 0; 2752 conf->quiesce = 0;
2740 wake_up(&conf->wait_for_stripe); 2753 wake_up(&conf->wait_for_stripe);
2754 wake_up(&conf->wait_for_overlap);
2741 spin_unlock_irq(&conf->device_lock); 2755 spin_unlock_irq(&conf->device_lock);
2742 break; 2756 break;
2743 } 2757 }
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 002ee631fabb..c0d3097846a7 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -151,6 +151,10 @@ struct mddev_s
151 sector_t resync_mismatches; /* count of sectors where 151 sector_t resync_mismatches; /* count of sectors where
152 * parity/replica mismatch found 152 * parity/replica mismatch found
153 */ 153 */
154
155 /* allow user-space to request suspension of IO to regions of the array */
156 sector_t suspend_lo;
157 sector_t suspend_hi;
154 /* if zero, use the system-wide default */ 158 /* if zero, use the system-wide default */
155 int sync_speed_min; 159 int sync_speed_min;
156 int sync_speed_max; 160 int sync_speed_max;