diff options
author | NeilBrown <neilb@suse.de> | 2014-09-29 18:10:42 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2014-10-13 22:08:28 -0400 |
commit | ac05f256691fe427a3e84c19261adb0b67dd73c0 (patch) | |
tree | 2f7254e7117eac0fc0d974029e5bb5422866a73c /drivers/md | |
parent | 8b1afc3d6751063d3f0cdefe55719b1cd2f7edcc (diff) |
md: don't start resync thread directly from md thread.
The main 'md' thread is needed for processing writes, so if it blocks
write requests could be delayed.
Starting a new thread requires some GFP_KERNEL allocations and so can
wait for writes to complete. This can deadlock.
So instead, ask a workqueue to start the sync thread.
There is no particular rush for this to happen, so any work queue
will do.
MD_RECOVERY_RUNNING is used to ensure only one thread is started.
Reported-by: BillStuff <billstuff2001@sbcglobal.net>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 59 |
1 files changed, 36 insertions, 23 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a7e9fae6c639..19171c58d790 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -7767,6 +7767,33 @@ no_add: | |||
7767 | return spares; | 7767 | return spares; |
7768 | } | 7768 | } |
7769 | 7769 | ||
7770 | static void md_start_sync(struct work_struct *ws) | ||
7771 | { | ||
7772 | struct mddev *mddev = container_of(ws, struct mddev, del_work); | ||
7773 | |||
7774 | mddev->sync_thread = md_register_thread(md_do_sync, | ||
7775 | mddev, | ||
7776 | "resync"); | ||
7777 | if (!mddev->sync_thread) { | ||
7778 | printk(KERN_ERR "%s: could not start resync" | ||
7779 | " thread...\n", | ||
7780 | mdname(mddev)); | ||
7781 | /* leave the spares where they are, it shouldn't hurt */ | ||
7782 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
7783 | clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
7784 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
7785 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
7786 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
7787 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | ||
7788 | &mddev->recovery)) | ||
7789 | if (mddev->sysfs_action) | ||
7790 | sysfs_notify_dirent_safe(mddev->sysfs_action); | ||
7791 | } else | ||
7792 | md_wakeup_thread(mddev->sync_thread); | ||
7793 | sysfs_notify_dirent_safe(mddev->sysfs_action); | ||
7794 | md_new_event(mddev); | ||
7795 | } | ||
7796 | |||
7770 | /* | 7797 | /* |
7771 | * This routine is regularly called by all per-raid-array threads to | 7798 | * This routine is regularly called by all per-raid-array threads to |
7772 | * deal with generic issues like resync and super-block update. | 7799 | * deal with generic issues like resync and super-block update. |
@@ -7883,7 +7910,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7883 | 7910 | ||
7884 | if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 7911 | if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
7885 | test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | 7912 | test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) |
7886 | goto unlock; | 7913 | goto not_running; |
7887 | /* no recovery is running. | 7914 | /* no recovery is running. |
7888 | * remove any failed drives, then | 7915 | * remove any failed drives, then |
7889 | * add spares if possible. | 7916 | * add spares if possible. |
@@ -7895,7 +7922,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7895 | if (mddev->pers->check_reshape == NULL || | 7922 | if (mddev->pers->check_reshape == NULL || |
7896 | mddev->pers->check_reshape(mddev) != 0) | 7923 | mddev->pers->check_reshape(mddev) != 0) |
7897 | /* Cannot proceed */ | 7924 | /* Cannot proceed */ |
7898 | goto unlock; | 7925 | goto not_running; |
7899 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 7926 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
7900 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 7927 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
7901 | } else if ((spares = remove_and_add_spares(mddev, NULL))) { | 7928 | } else if ((spares = remove_and_add_spares(mddev, NULL))) { |
@@ -7908,7 +7935,7 @@ void md_check_recovery(struct mddev *mddev) | |||
7908 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | 7935 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); |
7909 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 7936 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
7910 | /* nothing to be done ... */ | 7937 | /* nothing to be done ... */ |
7911 | goto unlock; | 7938 | goto not_running; |
7912 | 7939 | ||
7913 | if (mddev->pers->sync_request) { | 7940 | if (mddev->pers->sync_request) { |
7914 | if (spares) { | 7941 | if (spares) { |
@@ -7918,27 +7945,11 @@ void md_check_recovery(struct mddev *mddev) | |||
7918 | */ | 7945 | */ |
7919 | bitmap_write_all(mddev->bitmap); | 7946 | bitmap_write_all(mddev->bitmap); |
7920 | } | 7947 | } |
7921 | mddev->sync_thread = md_register_thread(md_do_sync, | 7948 | INIT_WORK(&mddev->del_work, md_start_sync); |
7922 | mddev, | 7949 | queue_work(md_misc_wq, &mddev->del_work); |
7923 | "resync"); | 7950 | goto unlock; |
7924 | if (!mddev->sync_thread) { | ||
7925 | printk(KERN_ERR "%s: could not start resync" | ||
7926 | " thread...\n", | ||
7927 | mdname(mddev)); | ||
7928 | /* leave the spares where they are, it shouldn't hurt */ | ||
7929 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
7930 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
7931 | clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
7932 | clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); | ||
7933 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
7934 | } else | ||
7935 | md_wakeup_thread(mddev->sync_thread); | ||
7936 | sysfs_notify_dirent_safe(mddev->sysfs_action); | ||
7937 | md_new_event(mddev); | ||
7938 | } | 7951 | } |
7939 | unlock: | 7952 | not_running: |
7940 | wake_up(&mddev->sb_wait); | ||
7941 | |||
7942 | if (!mddev->sync_thread) { | 7953 | if (!mddev->sync_thread) { |
7943 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | 7954 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); |
7944 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | 7955 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, |
@@ -7946,6 +7957,8 @@ void md_check_recovery(struct mddev *mddev) | |||
7946 | if (mddev->sysfs_action) | 7957 | if (mddev->sysfs_action) |
7947 | sysfs_notify_dirent_safe(mddev->sysfs_action); | 7958 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
7948 | } | 7959 | } |
7960 | unlock: | ||
7961 | wake_up(&mddev->sb_wait); | ||
7949 | mddev_unlock(mddev); | 7962 | mddev_unlock(mddev); |
7950 | } | 7963 | } |
7951 | } | 7964 | } |