aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-11-18 20:02:01 -0500
committerNeilBrown <neilb@suse.de>2013-11-18 23:19:17 -0500
commitc91abf5a3546a4ff0838d2905f4d7eae2795f724 (patch)
tree39b3735cebfa73ff370f2738bd8578a113fe4215
parent29f097c4d968021ee4fad1b033be5825ff78330e (diff)
md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.
We currently use kthread_should_stop() in various places in the sync/reshape code to abort early. However some places set MD_RECOVERY_INTR but don't immediately call md_reap_sync_thread() (and we will shortly get another one). When this happens we are relying on md_check_recovery() to reap the thread and that only happen when it finishes normally. So MD_RECOVERY_INTR must lead to a normal finish without the kthread_should_stop() test. So replace all relevant tests, and be more careful when the thread is interrupted not to acknowledge that latest step in a reshape as it may not be fully committed yet. Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop so we don't wait have to wait for the speed to drop before we can abort. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/md.c40
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c19
3 files changed, 34 insertions, 31 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1ca47b0f4779..a74045df7bab 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread)
7410 mddev->curr_resync = 2; 7410 mddev->curr_resync = 2;
7411 7411
7412 try_again: 7412 try_again:
7413 if (kthread_should_stop())
7414 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7415
7416 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) 7413 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7417 goto skip; 7414 goto skip;
7418 for_each_mddev(mddev2, tmp) { 7415 for_each_mddev(mddev2, tmp) {
@@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread)
7437 * be caught by 'softlockup' 7434 * be caught by 'softlockup'
7438 */ 7435 */
7439 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); 7436 prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
7440 if (!kthread_should_stop() && 7437 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
7441 mddev2->curr_resync >= mddev->curr_resync) { 7438 mddev2->curr_resync >= mddev->curr_resync) {
7442 printk(KERN_INFO "md: delaying %s of %s" 7439 printk(KERN_INFO "md: delaying %s of %s"
7443 " until %s has finished (they" 7440 " until %s has finished (they"
@@ -7513,7 +7510,7 @@ void md_do_sync(struct md_thread *thread)
7513 last_check = 0; 7510 last_check = 0;
7514 7511
7515 if (j>2) { 7512 if (j>2) {
7516 printk(KERN_INFO 7513 printk(KERN_INFO
7517 "md: resuming %s of %s from checkpoint.\n", 7514 "md: resuming %s of %s from checkpoint.\n",
7518 desc, mdname(mddev)); 7515 desc, mdname(mddev));
7519 mddev->curr_resync = j; 7516 mddev->curr_resync = j;
@@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread)
7550 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 7547 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
7551 } 7548 }
7552 7549
7553 while (j >= mddev->resync_max && !kthread_should_stop()) { 7550 while (j >= mddev->resync_max &&
7551 !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7554 /* As this condition is controlled by user-space, 7552 /* As this condition is controlled by user-space,
7555 * we can block indefinitely, so use '_interruptible' 7553 * we can block indefinitely, so use '_interruptible'
7556 * to avoid triggering warnings. 7554 * to avoid triggering warnings.
@@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread)
7558 flush_signals(current); /* just in case */ 7556 flush_signals(current); /* just in case */
7559 wait_event_interruptible(mddev->recovery_wait, 7557 wait_event_interruptible(mddev->recovery_wait,
7560 mddev->resync_max > j 7558 mddev->resync_max > j
7561 || kthread_should_stop()); 7559 || test_bit(MD_RECOVERY_INTR,
7560 &mddev->recovery));
7562 } 7561 }
7563 7562
7564 if (kthread_should_stop()) 7563 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7565 goto interrupted; 7564 break;
7566 7565
7567 sectors = mddev->pers->sync_request(mddev, j, &skipped, 7566 sectors = mddev->pers->sync_request(mddev, j, &skipped,
7568 currspeed < speed_min(mddev)); 7567 currspeed < speed_min(mddev));
7569 if (sectors == 0) { 7568 if (sectors == 0) {
7570 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 7569 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7571 goto out; 7570 break;
7572 } 7571 }
7573 7572
7574 if (!skipped) { /* actual IO requested */ 7573 if (!skipped) { /* actual IO requested */
@@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread)
7605 last_mark = next; 7604 last_mark = next;
7606 } 7605 }
7607 7606
7608 7607 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
7609 if (kthread_should_stop()) 7608 break;
7610 goto interrupted;
7611
7612 7609
7613 /* 7610 /*
7614 * this loop exits only if either when we are slower than 7611 * this loop exits only if either when we are slower than
@@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread)
7631 } 7628 }
7632 } 7629 }
7633 } 7630 }
7634 printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc); 7631 printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
7632 test_bit(MD_RECOVERY_INTR, &mddev->recovery)
7633 ? "interrupted" : "done");
7635 /* 7634 /*
7636 * this also signals 'finished resyncing' to md_stop 7635 * this also signals 'finished resyncing' to md_stop
7637 */ 7636 */
7638 out:
7639 blk_finish_plug(&plug); 7637 blk_finish_plug(&plug);
7640 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 7638 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7641 7639
@@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread)
7689 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 7687 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7690 md_wakeup_thread(mddev->thread); 7688 md_wakeup_thread(mddev->thread);
7691 return; 7689 return;
7692
7693 interrupted:
7694 /*
7695 * got a signal, exit.
7696 */
7697 printk(KERN_INFO
7698 "md: md_do_sync() got signal ... exiting\n");
7699 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
7700 goto out;
7701
7702} 7690}
7703EXPORT_SYMBOL_GPL(md_do_sync); 7691EXPORT_SYMBOL_GPL(md_do_sync);
7704 7692
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 73dc8a377522..4f87ba5f3a66 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4386 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4386 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4387 md_wakeup_thread(mddev->thread); 4387 md_wakeup_thread(mddev->thread);
4388 wait_event(mddev->sb_wait, mddev->flags == 0 || 4388 wait_event(mddev->sb_wait, mddev->flags == 0 ||
4389 kthread_should_stop()); 4389 test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4390 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4391 allow_barrier(conf);
4392 return sectors_done;
4393 }
4390 conf->reshape_safe = mddev->reshape_position; 4394 conf->reshape_safe = mddev->reshape_position;
4391 allow_barrier(conf); 4395 allow_barrier(conf);
4392 } 4396 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2c8907706109..02f6bc2ac2db 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
4842 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { 4842 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
4843 /* Cannot proceed until we've updated the superblock... */ 4843 /* Cannot proceed until we've updated the superblock... */
4844 wait_event(conf->wait_for_overlap, 4844 wait_event(conf->wait_for_overlap,
4845 atomic_read(&conf->reshape_stripes)==0); 4845 atomic_read(&conf->reshape_stripes)==0
4846 || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4847 if (atomic_read(&conf->reshape_stripes) != 0)
4848 return 0;
4846 mddev->reshape_position = conf->reshape_progress; 4849 mddev->reshape_position = conf->reshape_progress;
4847 mddev->curr_resync_completed = sector_nr; 4850 mddev->curr_resync_completed = sector_nr;
4848 conf->reshape_checkpoint = jiffies; 4851 conf->reshape_checkpoint = jiffies;
4849 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4852 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4850 md_wakeup_thread(mddev->thread); 4853 md_wakeup_thread(mddev->thread);
4851 wait_event(mddev->sb_wait, mddev->flags == 0 || 4854 wait_event(mddev->sb_wait, mddev->flags == 0 ||
4852 kthread_should_stop()); 4855 test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4856 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
4857 return 0;
4853 spin_lock_irq(&conf->device_lock); 4858 spin_lock_irq(&conf->device_lock);
4854 conf->reshape_safe = mddev->reshape_position; 4859 conf->reshape_safe = mddev->reshape_position;
4855 spin_unlock_irq(&conf->device_lock); 4860 spin_unlock_irq(&conf->device_lock);
@@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
4932 >= mddev->resync_max - mddev->curr_resync_completed) { 4937 >= mddev->resync_max - mddev->curr_resync_completed) {
4933 /* Cannot proceed until we've updated the superblock... */ 4938 /* Cannot proceed until we've updated the superblock... */
4934 wait_event(conf->wait_for_overlap, 4939 wait_event(conf->wait_for_overlap,
4935 atomic_read(&conf->reshape_stripes) == 0); 4940 atomic_read(&conf->reshape_stripes) == 0
4941 || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4942 if (atomic_read(&conf->reshape_stripes) != 0)
4943 goto ret;
4936 mddev->reshape_position = conf->reshape_progress; 4944 mddev->reshape_position = conf->reshape_progress;
4937 mddev->curr_resync_completed = sector_nr; 4945 mddev->curr_resync_completed = sector_nr;
4938 conf->reshape_checkpoint = jiffies; 4946 conf->reshape_checkpoint = jiffies;
@@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
4940 md_wakeup_thread(mddev->thread); 4948 md_wakeup_thread(mddev->thread);
4941 wait_event(mddev->sb_wait, 4949 wait_event(mddev->sb_wait,
4942 !test_bit(MD_CHANGE_DEVS, &mddev->flags) 4950 !test_bit(MD_CHANGE_DEVS, &mddev->flags)
4943 || kthread_should_stop()); 4951 || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4952 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
4953 goto ret;
4944 spin_lock_irq(&conf->device_lock); 4954 spin_lock_irq(&conf->device_lock);
4945 conf->reshape_safe = mddev->reshape_position; 4955 conf->reshape_safe = mddev->reshape_position;
4946 spin_unlock_irq(&conf->device_lock); 4956 spin_unlock_irq(&conf->device_lock);
4947 wake_up(&conf->wait_for_overlap); 4957 wake_up(&conf->wait_for_overlap);
4948 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 4958 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
4949 } 4959 }
4960ret:
4950 return reshape_sectors; 4961 return reshape_sectors;
4951} 4962}
4952 4963