aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-09-29 18:10:42 -0400
committerNeilBrown <neilb@suse.de>2014-10-13 22:08:28 -0400
commitac05f256691fe427a3e84c19261adb0b67dd73c0 (patch)
tree2f7254e7117eac0fc0d974029e5bb5422866a73c /drivers/md
parent8b1afc3d6751063d3f0cdefe55719b1cd2f7edcc (diff)
md: don't start resync thread directly from md thread.
The main 'md' thread is needed for processing writes, so if it blocks write requests could be delayed. Starting a new thread requires some GFP_KERNEL allocations and so can wait for writes to complete. This can deadlock. So instead, ask a workqueue to start the sync thread. There is no particular rush for this to happen, so any work queue will do. MD_RECOVERY_RUNNING is used to ensure only one thread is started. Reported-by: BillStuff <billstuff2001@sbcglobal.net> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c59
1 files changed, 36 insertions, 23 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a7e9fae6c639..19171c58d790 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7767,6 +7767,33 @@ no_add:
7767 return spares; 7767 return spares;
7768} 7768}
7769 7769
7770static void md_start_sync(struct work_struct *ws)
7771{
7772 struct mddev *mddev = container_of(ws, struct mddev, del_work);
7773
7774 mddev->sync_thread = md_register_thread(md_do_sync,
7775 mddev,
7776 "resync");
7777 if (!mddev->sync_thread) {
7778 printk(KERN_ERR "%s: could not start resync"
7779 " thread...\n",
7780 mdname(mddev));
7781 /* leave the spares where they are, it shouldn't hurt */
7782 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7783 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7784 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7785 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7786 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7787 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
7788 &mddev->recovery))
7789 if (mddev->sysfs_action)
7790 sysfs_notify_dirent_safe(mddev->sysfs_action);
7791 } else
7792 md_wakeup_thread(mddev->sync_thread);
7793 sysfs_notify_dirent_safe(mddev->sysfs_action);
7794 md_new_event(mddev);
7795}
7796
7770/* 7797/*
7771 * This routine is regularly called by all per-raid-array threads to 7798 * This routine is regularly called by all per-raid-array threads to
7772 * deal with generic issues like resync and super-block update. 7799 * deal with generic issues like resync and super-block update.
@@ -7883,7 +7910,7 @@ void md_check_recovery(struct mddev *mddev)
7883 7910
7884 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 7911 if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
7885 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) 7912 test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
7886 goto unlock; 7913 goto not_running;
7887 /* no recovery is running. 7914 /* no recovery is running.
7888 * remove any failed drives, then 7915 * remove any failed drives, then
7889 * add spares if possible. 7916 * add spares if possible.
@@ -7895,7 +7922,7 @@ void md_check_recovery(struct mddev *mddev)
7895 if (mddev->pers->check_reshape == NULL || 7922 if (mddev->pers->check_reshape == NULL ||
7896 mddev->pers->check_reshape(mddev) != 0) 7923 mddev->pers->check_reshape(mddev) != 0)
7897 /* Cannot proceed */ 7924 /* Cannot proceed */
7898 goto unlock; 7925 goto not_running;
7899 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); 7926 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7900 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 7927 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7901 } else if ((spares = remove_and_add_spares(mddev, NULL))) { 7928 } else if ((spares = remove_and_add_spares(mddev, NULL))) {
@@ -7908,7 +7935,7 @@ void md_check_recovery(struct mddev *mddev)
7908 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 7935 clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
7909 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 7936 } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
7910 /* nothing to be done ... */ 7937 /* nothing to be done ... */
7911 goto unlock; 7938 goto not_running;
7912 7939
7913 if (mddev->pers->sync_request) { 7940 if (mddev->pers->sync_request) {
7914 if (spares) { 7941 if (spares) {
@@ -7918,27 +7945,11 @@ void md_check_recovery(struct mddev *mddev)
7918 */ 7945 */
7919 bitmap_write_all(mddev->bitmap); 7946 bitmap_write_all(mddev->bitmap);
7920 } 7947 }
7921 mddev->sync_thread = md_register_thread(md_do_sync, 7948 INIT_WORK(&mddev->del_work, md_start_sync);
7922 mddev, 7949 queue_work(md_misc_wq, &mddev->del_work);
7923 "resync"); 7950 goto unlock;
7924 if (!mddev->sync_thread) {
7925 printk(KERN_ERR "%s: could not start resync"
7926 " thread...\n",
7927 mdname(mddev));
7928 /* leave the spares where they are, it shouldn't hurt */
7929 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7930 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
7931 clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
7932 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
7933 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
7934 } else
7935 md_wakeup_thread(mddev->sync_thread);
7936 sysfs_notify_dirent_safe(mddev->sysfs_action);
7937 md_new_event(mddev);
7938 } 7951 }
7939 unlock: 7952 not_running:
7940 wake_up(&mddev->sb_wait);
7941
7942 if (!mddev->sync_thread) { 7953 if (!mddev->sync_thread) {
7943 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7954 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7944 if (test_and_clear_bit(MD_RECOVERY_RECOVER, 7955 if (test_and_clear_bit(MD_RECOVERY_RECOVER,
@@ -7946,6 +7957,8 @@ void md_check_recovery(struct mddev *mddev)
7946 if (mddev->sysfs_action) 7957 if (mddev->sysfs_action)
7947 sysfs_notify_dirent_safe(mddev->sysfs_action); 7958 sysfs_notify_dirent_safe(mddev->sysfs_action);
7948 } 7959 }
7960 unlock:
7961 wake_up(&mddev->sb_wait);
7949 mddev_unlock(mddev); 7962 mddev_unlock(mddev);
7950 } 7963 }
7951} 7964}