diff options
author | NeilBrown <neilb@suse.de> | 2013-11-18 20:02:01 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2013-11-18 23:19:17 -0500 |
commit | c91abf5a3546a4ff0838d2905f4d7eae2795f724 (patch) | |
tree | 39b3735cebfa73ff370f2738bd8578a113fe4215 | |
parent | 29f097c4d968021ee4fad1b033be5825ff78330e (diff) |
md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.
We currently use kthread_should_stop() in various places in the
sync/reshape code to abort early.
However some places set MD_RECOVERY_INTR but don't immediately call
md_reap_sync_thread() (and we will shortly get another one).
When this happens we are relying on md_check_recovery() to reap the
thread and that only happen when it finishes normally.
So MD_RECOVERY_INTR must lead to a normal finish without the
kthread_should_stop() test.
So replace all relevant tests, and be more careful when the thread is
interrupted not to acknowledge that latest step in a reshape as it may
not be fully committed yet.
Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop
so we don't wait have to wait for the speed to drop before we can abort.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/md.c | 40 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | drivers/md/raid5.c | 19 |
3 files changed, 34 insertions, 31 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 1ca47b0f4779..a74045df7bab 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread) | |||
7410 | mddev->curr_resync = 2; | 7410 | mddev->curr_resync = 2; |
7411 | 7411 | ||
7412 | try_again: | 7412 | try_again: |
7413 | if (kthread_should_stop()) | ||
7414 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
7415 | |||
7416 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | 7413 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
7417 | goto skip; | 7414 | goto skip; |
7418 | for_each_mddev(mddev2, tmp) { | 7415 | for_each_mddev(mddev2, tmp) { |
@@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread) | |||
7437 | * be caught by 'softlockup' | 7434 | * be caught by 'softlockup' |
7438 | */ | 7435 | */ |
7439 | prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); | 7436 | prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); |
7440 | if (!kthread_should_stop() && | 7437 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && |
7441 | mddev2->curr_resync >= mddev->curr_resync) { | 7438 | mddev2->curr_resync >= mddev->curr_resync) { |
7442 | printk(KERN_INFO "md: delaying %s of %s" | 7439 | printk(KERN_INFO "md: delaying %s of %s" |
7443 | " until %s has finished (they" | 7440 | " until %s has finished (they" |
@@ -7513,7 +7510,7 @@ void md_do_sync(struct md_thread *thread) | |||
7513 | last_check = 0; | 7510 | last_check = 0; |
7514 | 7511 | ||
7515 | if (j>2) { | 7512 | if (j>2) { |
7516 | printk(KERN_INFO | 7513 | printk(KERN_INFO |
7517 | "md: resuming %s of %s from checkpoint.\n", | 7514 | "md: resuming %s of %s from checkpoint.\n", |
7518 | desc, mdname(mddev)); | 7515 | desc, mdname(mddev)); |
7519 | mddev->curr_resync = j; | 7516 | mddev->curr_resync = j; |
@@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread) | |||
7550 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 7547 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
7551 | } | 7548 | } |
7552 | 7549 | ||
7553 | while (j >= mddev->resync_max && !kthread_should_stop()) { | 7550 | while (j >= mddev->resync_max && |
7551 | !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
7554 | /* As this condition is controlled by user-space, | 7552 | /* As this condition is controlled by user-space, |
7555 | * we can block indefinitely, so use '_interruptible' | 7553 | * we can block indefinitely, so use '_interruptible' |
7556 | * to avoid triggering warnings. | 7554 | * to avoid triggering warnings. |
@@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread) | |||
7558 | flush_signals(current); /* just in case */ | 7556 | flush_signals(current); /* just in case */ |
7559 | wait_event_interruptible(mddev->recovery_wait, | 7557 | wait_event_interruptible(mddev->recovery_wait, |
7560 | mddev->resync_max > j | 7558 | mddev->resync_max > j |
7561 | || kthread_should_stop()); | 7559 | || test_bit(MD_RECOVERY_INTR, |
7560 | &mddev->recovery)); | ||
7562 | } | 7561 | } |
7563 | 7562 | ||
7564 | if (kthread_should_stop()) | 7563 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
7565 | goto interrupted; | 7564 | break; |
7566 | 7565 | ||
7567 | sectors = mddev->pers->sync_request(mddev, j, &skipped, | 7566 | sectors = mddev->pers->sync_request(mddev, j, &skipped, |
7568 | currspeed < speed_min(mddev)); | 7567 | currspeed < speed_min(mddev)); |
7569 | if (sectors == 0) { | 7568 | if (sectors == 0) { |
7570 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 7569 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
7571 | goto out; | 7570 | break; |
7572 | } | 7571 | } |
7573 | 7572 | ||
7574 | if (!skipped) { /* actual IO requested */ | 7573 | if (!skipped) { /* actual IO requested */ |
@@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread) | |||
7605 | last_mark = next; | 7604 | last_mark = next; |
7606 | } | 7605 | } |
7607 | 7606 | ||
7608 | 7607 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | |
7609 | if (kthread_should_stop()) | 7608 | break; |
7610 | goto interrupted; | ||
7611 | |||
7612 | 7609 | ||
7613 | /* | 7610 | /* |
7614 | * this loop exits only if either when we are slower than | 7611 | * this loop exits only if either when we are slower than |
@@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread) | |||
7631 | } | 7628 | } |
7632 | } | 7629 | } |
7633 | } | 7630 | } |
7634 | printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc); | 7631 | printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc, |
7632 | test_bit(MD_RECOVERY_INTR, &mddev->recovery) | ||
7633 | ? "interrupted" : "done"); | ||
7635 | /* | 7634 | /* |
7636 | * this also signals 'finished resyncing' to md_stop | 7635 | * this also signals 'finished resyncing' to md_stop |
7637 | */ | 7636 | */ |
7638 | out: | ||
7639 | blk_finish_plug(&plug); | 7637 | blk_finish_plug(&plug); |
7640 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); | 7638 | wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); |
7641 | 7639 | ||
@@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread) | |||
7689 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 7687 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
7690 | md_wakeup_thread(mddev->thread); | 7688 | md_wakeup_thread(mddev->thread); |
7691 | return; | 7689 | return; |
7692 | |||
7693 | interrupted: | ||
7694 | /* | ||
7695 | * got a signal, exit. | ||
7696 | */ | ||
7697 | printk(KERN_INFO | ||
7698 | "md: md_do_sync() got signal ... exiting\n"); | ||
7699 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
7700 | goto out; | ||
7701 | |||
7702 | } | 7690 | } |
7703 | EXPORT_SYMBOL_GPL(md_do_sync); | 7691 | EXPORT_SYMBOL_GPL(md_do_sync); |
7704 | 7692 | ||
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 73dc8a377522..4f87ba5f3a66 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, | |||
4386 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4386 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
4387 | md_wakeup_thread(mddev->thread); | 4387 | md_wakeup_thread(mddev->thread); |
4388 | wait_event(mddev->sb_wait, mddev->flags == 0 || | 4388 | wait_event(mddev->sb_wait, mddev->flags == 0 || |
4389 | kthread_should_stop()); | 4389 | test_bit(MD_RECOVERY_INTR, &mddev->recovery)); |
4390 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
4391 | allow_barrier(conf); | ||
4392 | return sectors_done; | ||
4393 | } | ||
4390 | conf->reshape_safe = mddev->reshape_position; | 4394 | conf->reshape_safe = mddev->reshape_position; |
4391 | allow_barrier(conf); | 4395 | allow_barrier(conf); |
4392 | } | 4396 | } |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2c8907706109..02f6bc2ac2db 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk | |||
4842 | time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { | 4842 | time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { |
4843 | /* Cannot proceed until we've updated the superblock... */ | 4843 | /* Cannot proceed until we've updated the superblock... */ |
4844 | wait_event(conf->wait_for_overlap, | 4844 | wait_event(conf->wait_for_overlap, |
4845 | atomic_read(&conf->reshape_stripes)==0); | 4845 | atomic_read(&conf->reshape_stripes)==0 |
4846 | || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); | ||
4847 | if (atomic_read(&conf->reshape_stripes) != 0) | ||
4848 | return 0; | ||
4846 | mddev->reshape_position = conf->reshape_progress; | 4849 | mddev->reshape_position = conf->reshape_progress; |
4847 | mddev->curr_resync_completed = sector_nr; | 4850 | mddev->curr_resync_completed = sector_nr; |
4848 | conf->reshape_checkpoint = jiffies; | 4851 | conf->reshape_checkpoint = jiffies; |
4849 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4852 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
4850 | md_wakeup_thread(mddev->thread); | 4853 | md_wakeup_thread(mddev->thread); |
4851 | wait_event(mddev->sb_wait, mddev->flags == 0 || | 4854 | wait_event(mddev->sb_wait, mddev->flags == 0 || |
4852 | kthread_should_stop()); | 4855 | test_bit(MD_RECOVERY_INTR, &mddev->recovery)); |
4856 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
4857 | return 0; | ||
4853 | spin_lock_irq(&conf->device_lock); | 4858 | spin_lock_irq(&conf->device_lock); |
4854 | conf->reshape_safe = mddev->reshape_position; | 4859 | conf->reshape_safe = mddev->reshape_position; |
4855 | spin_unlock_irq(&conf->device_lock); | 4860 | spin_unlock_irq(&conf->device_lock); |
@@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk | |||
4932 | >= mddev->resync_max - mddev->curr_resync_completed) { | 4937 | >= mddev->resync_max - mddev->curr_resync_completed) { |
4933 | /* Cannot proceed until we've updated the superblock... */ | 4938 | /* Cannot proceed until we've updated the superblock... */ |
4934 | wait_event(conf->wait_for_overlap, | 4939 | wait_event(conf->wait_for_overlap, |
4935 | atomic_read(&conf->reshape_stripes) == 0); | 4940 | atomic_read(&conf->reshape_stripes) == 0 |
4941 | || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); | ||
4942 | if (atomic_read(&conf->reshape_stripes) != 0) | ||
4943 | goto ret; | ||
4936 | mddev->reshape_position = conf->reshape_progress; | 4944 | mddev->reshape_position = conf->reshape_progress; |
4937 | mddev->curr_resync_completed = sector_nr; | 4945 | mddev->curr_resync_completed = sector_nr; |
4938 | conf->reshape_checkpoint = jiffies; | 4946 | conf->reshape_checkpoint = jiffies; |
@@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk | |||
4940 | md_wakeup_thread(mddev->thread); | 4948 | md_wakeup_thread(mddev->thread); |
4941 | wait_event(mddev->sb_wait, | 4949 | wait_event(mddev->sb_wait, |
4942 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) | 4950 | !test_bit(MD_CHANGE_DEVS, &mddev->flags) |
4943 | || kthread_should_stop()); | 4951 | || test_bit(MD_RECOVERY_INTR, &mddev->recovery)); |
4952 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
4953 | goto ret; | ||
4944 | spin_lock_irq(&conf->device_lock); | 4954 | spin_lock_irq(&conf->device_lock); |
4945 | conf->reshape_safe = mddev->reshape_position; | 4955 | conf->reshape_safe = mddev->reshape_position; |
4946 | spin_unlock_irq(&conf->device_lock); | 4956 | spin_unlock_irq(&conf->device_lock); |
4947 | wake_up(&conf->wait_for_overlap); | 4957 | wake_up(&conf->wait_for_overlap); |
4948 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 4958 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
4949 | } | 4959 | } |
4960 | ret: | ||
4950 | return reshape_sectors; | 4961 | return reshape_sectors; |
4951 | } | 4962 | } |
4952 | 4963 | ||