diff options
author | NeilBrown <neilb@suse.de> | 2012-07-03 03:45:31 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-07-03 03:45:31 -0400 |
commit | b357f04a67c2aeee828b240863cd3f21d6cb3179 (patch) | |
tree | b8495f2c04fc40d5a2885fe4f7ff8d627cd55031 /drivers/md | |
parent | f456309106e9657645c81bce1a6bb3230393564e (diff) |
md: fix up plugging (again).
The value returned by "mddev_check_plug" is only valid until the
next 'schedule' as that will unplug things. This could happen at any
call to mempool_alloc.
So just calling mddev_check_plug at the start doesn't really make
sense.
So call it just before, or just after, queuing things for the thread.
As the action that happens at unplug is to wake the thread, this makes
lots of sense.
If we cannot add a plug (which requires a small GFP_ATOMIC alloc) we
wake thread immediately.
RAID5 is a bit different. Requests are queued for the thread and the
thread is woken by release_stripe. So we don't need to wake the
thread on failure.
However the thread doesn't perform certain actions when there is any
active plug, so it is important to install a plug before waking the
thread. So for RAID5 we install the plug *before* queuing the request
and waking the thread.
Without this patch it is possible for raid1 or raid10 to queue a
request without then waking the thread, resulting in the array locking
up.
Also change raid10 to only flush_pending_write when there are not
active plugs, just like raid1.
This patch is suitable for 3.0 or later. I plan to submit it to
-stable, but I'll like to let it spend a few weeks in mainline
first to be sure it is completely safe.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | drivers/md/raid10.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5.c | 6 |
3 files changed, 9 insertions, 16 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 34b4665cb0b6..8c2754f835ef 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
883 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 883 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
884 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 884 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
885 | struct md_rdev *blocked_rdev; | 885 | struct md_rdev *blocked_rdev; |
886 | int plugged; | ||
887 | int first_clone; | 886 | int first_clone; |
888 | int sectors_handled; | 887 | int sectors_handled; |
889 | int max_sectors; | 888 | int max_sectors; |
@@ -1034,7 +1033,6 @@ read_again: | |||
1034 | * the bad blocks. Each set of writes gets it's own r1bio | 1033 | * the bad blocks. Each set of writes gets it's own r1bio |
1035 | * with a set of bios attached. | 1034 | * with a set of bios attached. |
1036 | */ | 1035 | */ |
1037 | plugged = mddev_check_plugged(mddev); | ||
1038 | 1036 | ||
1039 | disks = conf->raid_disks * 2; | 1037 | disks = conf->raid_disks * 2; |
1040 | retry_write: | 1038 | retry_write: |
@@ -1191,6 +1189,8 @@ read_again: | |||
1191 | bio_list_add(&conf->pending_bio_list, mbio); | 1189 | bio_list_add(&conf->pending_bio_list, mbio); |
1192 | conf->pending_count++; | 1190 | conf->pending_count++; |
1193 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1191 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1192 | if (!mddev_check_plugged(mddev)) | ||
1193 | md_wakeup_thread(mddev->thread); | ||
1194 | } | 1194 | } |
1195 | /* Mustn't call r1_bio_write_done before this next test, | 1195 | /* Mustn't call r1_bio_write_done before this next test, |
1196 | * as it could result in the bio being freed. | 1196 | * as it could result in the bio being freed. |
@@ -1213,9 +1213,6 @@ read_again: | |||
1213 | 1213 | ||
1214 | /* In case raid1d snuck in to freeze_array */ | 1214 | /* In case raid1d snuck in to freeze_array */ |
1215 | wake_up(&conf->wait_barrier); | 1215 | wake_up(&conf->wait_barrier); |
1216 | |||
1217 | if (do_sync || !bitmap || !plugged) | ||
1218 | md_wakeup_thread(mddev->thread); | ||
1219 | } | 1216 | } |
1220 | 1217 | ||
1221 | static void status(struct seq_file *seq, struct mddev *mddev) | 1218 | static void status(struct seq_file *seq, struct mddev *mddev) |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index edc1088a1320..acf5a828c7e1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1039 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 1039 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
1040 | unsigned long flags; | 1040 | unsigned long flags; |
1041 | struct md_rdev *blocked_rdev; | 1041 | struct md_rdev *blocked_rdev; |
1042 | int plugged; | ||
1043 | int sectors_handled; | 1042 | int sectors_handled; |
1044 | int max_sectors; | 1043 | int max_sectors; |
1045 | int sectors; | 1044 | int sectors; |
@@ -1239,7 +1238,6 @@ read_again: | |||
1239 | * of r10_bios is recored in bio->bi_phys_segments just as with | 1238 | * of r10_bios is recored in bio->bi_phys_segments just as with |
1240 | * the read case. | 1239 | * the read case. |
1241 | */ | 1240 | */ |
1242 | plugged = mddev_check_plugged(mddev); | ||
1243 | 1241 | ||
1244 | r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ | 1242 | r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ |
1245 | raid10_find_phys(conf, r10_bio); | 1243 | raid10_find_phys(conf, r10_bio); |
@@ -1396,6 +1394,8 @@ retry_write: | |||
1396 | bio_list_add(&conf->pending_bio_list, mbio); | 1394 | bio_list_add(&conf->pending_bio_list, mbio); |
1397 | conf->pending_count++; | 1395 | conf->pending_count++; |
1398 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1396 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1397 | if (!mddev_check_plugged(mddev, 0, 0)) | ||
1398 | md_wakeup_thread(mddev->thread); | ||
1399 | 1399 | ||
1400 | if (!r10_bio->devs[i].repl_bio) | 1400 | if (!r10_bio->devs[i].repl_bio) |
1401 | continue; | 1401 | continue; |
@@ -1423,6 +1423,8 @@ retry_write: | |||
1423 | bio_list_add(&conf->pending_bio_list, mbio); | 1423 | bio_list_add(&conf->pending_bio_list, mbio); |
1424 | conf->pending_count++; | 1424 | conf->pending_count++; |
1425 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1425 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1426 | if (!mddev_check_plugged(mddev)) | ||
1427 | md_wakeup_thread(mddev->thread); | ||
1426 | } | 1428 | } |
1427 | 1429 | ||
1428 | /* Don't remove the bias on 'remaining' (one_write_done) until | 1430 | /* Don't remove the bias on 'remaining' (one_write_done) until |
@@ -1448,9 +1450,6 @@ retry_write: | |||
1448 | 1450 | ||
1449 | /* In case raid10d snuck in to freeze_array */ | 1451 | /* In case raid10d snuck in to freeze_array */ |
1450 | wake_up(&conf->wait_barrier); | 1452 | wake_up(&conf->wait_barrier); |
1451 | |||
1452 | if (do_sync || !mddev->bitmap || !plugged) | ||
1453 | md_wakeup_thread(mddev->thread); | ||
1454 | } | 1453 | } |
1455 | 1454 | ||
1456 | static void status(struct seq_file *seq, struct mddev *mddev) | 1455 | static void status(struct seq_file *seq, struct mddev *mddev) |
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev) | |||
2661 | blk_start_plug(&plug); | 2660 | blk_start_plug(&plug); |
2662 | for (;;) { | 2661 | for (;;) { |
2663 | 2662 | ||
2664 | flush_pending_writes(conf); | 2663 | if (atomic_read(&mddev->plug_cnt) == 0) |
2664 | flush_pending_writes(conf); | ||
2665 | 2665 | ||
2666 | spin_lock_irqsave(&conf->device_lock, flags); | 2666 | spin_lock_irqsave(&conf->device_lock, flags); |
2667 | if (list_empty(head)) { | 2667 | if (list_empty(head)) { |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7245a9df35a9..04348d76bb30 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3997,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
3997 | struct stripe_head *sh; | 3997 | struct stripe_head *sh; |
3998 | const int rw = bio_data_dir(bi); | 3998 | const int rw = bio_data_dir(bi); |
3999 | int remaining; | 3999 | int remaining; |
4000 | int plugged; | ||
4001 | 4000 | ||
4002 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 4001 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
4003 | md_flush_request(mddev, bi); | 4002 | md_flush_request(mddev, bi); |
@@ -4016,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4016 | bi->bi_next = NULL; | 4015 | bi->bi_next = NULL; |
4017 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 4016 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
4018 | 4017 | ||
4019 | plugged = mddev_check_plugged(mddev); | ||
4020 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 4018 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
4021 | DEFINE_WAIT(w); | 4019 | DEFINE_WAIT(w); |
4022 | int previous; | 4020 | int previous; |
@@ -4118,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4118 | if ((bi->bi_rw & REQ_SYNC) && | 4116 | if ((bi->bi_rw & REQ_SYNC) && |
4119 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4117 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4120 | atomic_inc(&conf->preread_active_stripes); | 4118 | atomic_inc(&conf->preread_active_stripes); |
4119 | mddev_check_plugged(mddev); | ||
4121 | release_stripe(sh); | 4120 | release_stripe(sh); |
4122 | } else { | 4121 | } else { |
4123 | /* cannot get stripe for read-ahead, just give-up */ | 4122 | /* cannot get stripe for read-ahead, just give-up */ |
@@ -4125,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4125 | finish_wait(&conf->wait_for_overlap, &w); | 4124 | finish_wait(&conf->wait_for_overlap, &w); |
4126 | break; | 4125 | break; |
4127 | } | 4126 | } |
4128 | |||
4129 | } | 4127 | } |
4130 | if (!plugged) | ||
4131 | md_wakeup_thread(mddev->thread); | ||
4132 | 4128 | ||
4133 | spin_lock_irq(&conf->device_lock); | 4129 | spin_lock_irq(&conf->device_lock); |
4134 | remaining = raid5_dec_bi_phys_segments(bi); | 4130 | remaining = raid5_dec_bi_phys_segments(bi); |