aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-08-15 12:30:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-08-15 12:30:24 -0400
commit3141eb6c50f1dafa99874e702d8b444034e2bb10 (patch)
tree7eb110985d6487b90712bf84c4ade67be3783909
parent7db9cbb3748c46b80e5c99ffa91945b8dd4ed5e5 (diff)
parent56ac36d722d0d27c03599d1245ac0ab59e474e5c (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: cancel check/repair requests when recovery is needed Allow raid10 resync to happening in larger chunks. Allow faulty devices to be removed from a readonly array. Don't let a blocked_rdev interfere with read request in raid5/6 Fail safely when trying to grow an array with a write-intent bitmap. Restore force switch of md array to readonly at reboot time. Make writes to md/safe_mode_delay immediately effective.
-rw-r--r--drivers/md/md.c33
-rw-r--r--drivers/md/raid10.c9
-rw-r--r--drivers/md/raid5.c32
3 files changed, 59 insertions, 15 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c7aae66c6f9b..8cfadc5bd2ba 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2393,6 +2393,8 @@ static void analyze_sbs(mddev_t * mddev)
2393 2393
2394} 2394}
2395 2395
2396static void md_safemode_timeout(unsigned long data);
2397
2396static ssize_t 2398static ssize_t
2397safe_delay_show(mddev_t *mddev, char *page) 2399safe_delay_show(mddev_t *mddev, char *page)
2398{ 2400{
@@ -2432,9 +2434,12 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
2432 if (msec == 0) 2434 if (msec == 0)
2433 mddev->safemode_delay = 0; 2435 mddev->safemode_delay = 0;
2434 else { 2436 else {
2437 unsigned long old_delay = mddev->safemode_delay;
2435 mddev->safemode_delay = (msec*HZ)/1000; 2438 mddev->safemode_delay = (msec*HZ)/1000;
2436 if (mddev->safemode_delay == 0) 2439 if (mddev->safemode_delay == 0)
2437 mddev->safemode_delay = 1; 2440 mddev->safemode_delay = 1;
2441 if (mddev->safemode_delay < old_delay)
2442 md_safemode_timeout((unsigned long)mddev);
2438 } 2443 }
2439 return len; 2444 return len;
2440} 2445}
@@ -4634,6 +4639,11 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
4634 */ 4639 */
4635 if (mddev->sync_thread) 4640 if (mddev->sync_thread)
4636 return -EBUSY; 4641 return -EBUSY;
4642 if (mddev->bitmap)
4643 /* Sorry, cannot grow a bitmap yet, just remove it,
4644 * grow, and re-add.
4645 */
4646 return -EBUSY;
4637 rdev_for_each(rdev, tmp, mddev) { 4647 rdev_for_each(rdev, tmp, mddev) {
4638 sector_t avail; 4648 sector_t avail;
4639 avail = rdev->size * 2; 4649 avail = rdev->size * 2;
@@ -5993,7 +6003,7 @@ static int remove_and_add_spares(mddev_t *mddev)
5993 } 6003 }
5994 } 6004 }
5995 6005
5996 if (mddev->degraded) { 6006 if (mddev->degraded && ! mddev->ro) {
5997 rdev_for_each(rdev, rtmp, mddev) { 6007 rdev_for_each(rdev, rtmp, mddev) {
5998 if (rdev->raid_disk >= 0 && 6008 if (rdev->raid_disk >= 0 &&
5999 !test_bit(In_sync, &rdev->flags) && 6009 !test_bit(In_sync, &rdev->flags) &&
@@ -6067,6 +6077,8 @@ void md_check_recovery(mddev_t *mddev)
6067 flush_signals(current); 6077 flush_signals(current);
6068 } 6078 }
6069 6079
6080 if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
6081 return;
6070 if ( ! ( 6082 if ( ! (
6071 (mddev->flags && !mddev->external) || 6083 (mddev->flags && !mddev->external) ||
6072 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 6084 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
@@ -6080,6 +6092,15 @@ void md_check_recovery(mddev_t *mddev)
6080 if (mddev_trylock(mddev)) { 6092 if (mddev_trylock(mddev)) {
6081 int spares = 0; 6093 int spares = 0;
6082 6094
6095 if (mddev->ro) {
6096 /* Only thing we do on a ro array is remove
6097 * failed devices.
6098 */
6099 remove_and_add_spares(mddev);
6100 clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6101 goto unlock;
6102 }
6103
6083 if (!mddev->external) { 6104 if (!mddev->external) {
6084 int did_change = 0; 6105 int did_change = 0;
6085 spin_lock_irq(&mddev->write_lock); 6106 spin_lock_irq(&mddev->write_lock);
@@ -6117,7 +6138,8 @@ void md_check_recovery(mddev_t *mddev)
6117 /* resync has finished, collect result */ 6138 /* resync has finished, collect result */
6118 md_unregister_thread(mddev->sync_thread); 6139 md_unregister_thread(mddev->sync_thread);
6119 mddev->sync_thread = NULL; 6140 mddev->sync_thread = NULL;
6120 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 6141 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
6142 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
6121 /* success...*/ 6143 /* success...*/
6122 /* activate any spares */ 6144 /* activate any spares */
6123 if (mddev->pers->spare_active(mddev)) 6145 if (mddev->pers->spare_active(mddev))
@@ -6169,6 +6191,7 @@ void md_check_recovery(mddev_t *mddev)
6169 } else if ((spares = remove_and_add_spares(mddev))) { 6191 } else if ((spares = remove_and_add_spares(mddev))) {
6170 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 6192 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
6171 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 6193 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
6194 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
6172 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 6195 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
6173 } else if (mddev->recovery_cp < MaxSector) { 6196 } else if (mddev->recovery_cp < MaxSector) {
6174 set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 6197 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -6232,7 +6255,11 @@ static int md_notify_reboot(struct notifier_block *this,
6232 6255
6233 for_each_mddev(mddev, tmp) 6256 for_each_mddev(mddev, tmp)
6234 if (mddev_trylock(mddev)) { 6257 if (mddev_trylock(mddev)) {
6235 do_md_stop (mddev, 1, 0); 6258 /* Force a switch to readonly even array
6259 * appears to still be in use. Hence
6260 * the '100'.
6261 */
6262 do_md_stop (mddev, 1, 100);
6236 mddev_unlock(mddev); 6263 mddev_unlock(mddev);
6237 } 6264 }
6238 /* 6265 /*
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d41bebb6da0f..e34cd0e62473 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -76,11 +76,13 @@ static void r10bio_pool_free(void *r10_bio, void *data)
76 kfree(r10_bio); 76 kfree(r10_bio);
77} 77}
78 78
79/* Maximum size of each resync request */
79#define RESYNC_BLOCK_SIZE (64*1024) 80#define RESYNC_BLOCK_SIZE (64*1024)
80//#define RESYNC_BLOCK_SIZE PAGE_SIZE
81#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
82#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) 81#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
83#define RESYNC_WINDOW (2048*1024) 82/* amount of memory to reserve for resync requests */
83#define RESYNC_WINDOW (1024*1024)
84/* maximum number of concurrent requests, memory permitting */
85#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
84 86
85/* 87/*
86 * When performing a resync, we need to read and compare, so 88 * When performing a resync, we need to read and compare, so
@@ -690,7 +692,6 @@ static int flush_pending_writes(conf_t *conf)
690 * there is no normal IO happeing. It must arrange to call 692 * there is no normal IO happeing. It must arrange to call
691 * lower_barrier when the particular background IO completes. 693 * lower_barrier when the particular background IO completes.
692 */ 694 */
693#define RESYNC_DEPTH 32
694 695
695static void raise_barrier(conf_t *conf, int force) 696static void raise_barrier(conf_t *conf, int force)
696{ 697{
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 40e939675657..224de022e7c5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2568,10 +2568,10 @@ static bool handle_stripe5(struct stripe_head *sh)
2568 if (dev->written) 2568 if (dev->written)
2569 s.written++; 2569 s.written++;
2570 rdev = rcu_dereference(conf->disks[i].rdev); 2570 rdev = rcu_dereference(conf->disks[i].rdev);
2571 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { 2571 if (blocked_rdev == NULL &&
2572 rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
2572 blocked_rdev = rdev; 2573 blocked_rdev = rdev;
2573 atomic_inc(&rdev->nr_pending); 2574 atomic_inc(&rdev->nr_pending);
2574 break;
2575 } 2575 }
2576 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2576 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
2577 /* The ReadError flag will just be confusing now */ 2577 /* The ReadError flag will just be confusing now */
@@ -2588,8 +2588,14 @@ static bool handle_stripe5(struct stripe_head *sh)
2588 rcu_read_unlock(); 2588 rcu_read_unlock();
2589 2589
2590 if (unlikely(blocked_rdev)) { 2590 if (unlikely(blocked_rdev)) {
2591 set_bit(STRIPE_HANDLE, &sh->state); 2591 if (s.syncing || s.expanding || s.expanded ||
2592 goto unlock; 2592 s.to_write || s.written) {
2593 set_bit(STRIPE_HANDLE, &sh->state);
2594 goto unlock;
2595 }
2596 /* There is nothing for the blocked_rdev to block */
2597 rdev_dec_pending(blocked_rdev, conf->mddev);
2598 blocked_rdev = NULL;
2593 } 2599 }
2594 2600
2595 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { 2601 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -2832,10 +2838,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2832 if (dev->written) 2838 if (dev->written)
2833 s.written++; 2839 s.written++;
2834 rdev = rcu_dereference(conf->disks[i].rdev); 2840 rdev = rcu_dereference(conf->disks[i].rdev);
2835 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { 2841 if (blocked_rdev == NULL &&
2842 rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
2836 blocked_rdev = rdev; 2843 blocked_rdev = rdev;
2837 atomic_inc(&rdev->nr_pending); 2844 atomic_inc(&rdev->nr_pending);
2838 break;
2839 } 2845 }
2840 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2846 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
2841 /* The ReadError flag will just be confusing now */ 2847 /* The ReadError flag will just be confusing now */
@@ -2853,9 +2859,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2853 rcu_read_unlock(); 2859 rcu_read_unlock();
2854 2860
2855 if (unlikely(blocked_rdev)) { 2861 if (unlikely(blocked_rdev)) {
2856 set_bit(STRIPE_HANDLE, &sh->state); 2862 if (s.syncing || s.expanding || s.expanded ||
2857 goto unlock; 2863 s.to_write || s.written) {
2864 set_bit(STRIPE_HANDLE, &sh->state);
2865 goto unlock;
2866 }
2867 /* There is nothing for the blocked_rdev to block */
2868 rdev_dec_pending(blocked_rdev, conf->mddev);
2869 blocked_rdev = NULL;
2858 } 2870 }
2871
2859 pr_debug("locked=%d uptodate=%d to_read=%d" 2872 pr_debug("locked=%d uptodate=%d to_read=%d"
2860 " to_write=%d failed=%d failed_num=%d,%d\n", 2873 " to_write=%d failed=%d failed_num=%d,%d\n",
2861 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 2874 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -4446,6 +4459,9 @@ static int raid5_check_reshape(mddev_t *mddev)
4446 return -EINVAL; /* Cannot shrink array or change level yet */ 4459 return -EINVAL; /* Cannot shrink array or change level yet */
4447 if (mddev->delta_disks == 0) 4460 if (mddev->delta_disks == 0)
4448 return 0; /* nothing to do */ 4461 return 0; /* nothing to do */
4462 if (mddev->bitmap)
4463 /* Cannot grow a bitmap yet */
4464 return -EBUSY;
4449 4465
4450 /* Can only proceed if there are plenty of stripe_heads. 4466 /* Can only proceed if there are plenty of stripe_heads.
4451 * We need a minimum of one full stripe,, and for sensible progress 4467 * We need a minimum of one full stripe,, and for sensible progress