aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-07-03 03:45:31 -0400
committerNeilBrown <neilb@suse.de>2012-07-03 03:45:31 -0400
commitb357f04a67c2aeee828b240863cd3f21d6cb3179 (patch)
treeb8495f2c04fc40d5a2885fe4f7ff8d627cd55031 /drivers/md
parentf456309106e9657645c81bce1a6bb3230393564e (diff)
md: fix up plugging (again).
The value returned by "mddev_check_plug" is only valid until the next 'schedule' as that will unplug things. This could happen at any call to mempool_alloc. So just calling mddev_check_plug at the start doesn't really make sense. So call it just before, or just after, queuing things for the thread. As the action that happens at unplug is to wake the thread, this makes lots of sense. If we cannot add a plug (which requires a small GFP_ATOMIC alloc) we wake thread immediately. RAID5 is a bit different. Requests are queued for the thread and the thread is woken by release_stripe. So we don't need to wake the thread on failure. However the thread doesn't perform certain actions when there is any active plug, so it is important to install a plug before waking the thread. So for RAID5 we install the plug *before* queuing the request and waking the thread. Without this patch it is possible for raid1 or raid10 to queue a request without then waking the thread, resulting in the array locking up. Also change raid10 to only flush_pending_write when there are not active plugs, just like raid1. This patch is suitable for 3.0 or later. I plan to submit it to -stable, but I'll like to let it spend a few weeks in mainline first to be sure it is completely safe. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c12
-rw-r--r--drivers/md/raid5.c6
3 files changed, 9 insertions, 16 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 34b4665cb0b6..8c2754f835ef 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
883 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 883 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
884 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 884 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
885 struct md_rdev *blocked_rdev; 885 struct md_rdev *blocked_rdev;
886 int plugged;
887 int first_clone; 886 int first_clone;
888 int sectors_handled; 887 int sectors_handled;
889 int max_sectors; 888 int max_sectors;
@@ -1034,7 +1033,6 @@ read_again:
1034 * the bad blocks. Each set of writes gets it's own r1bio 1033 * the bad blocks. Each set of writes gets it's own r1bio
1035 * with a set of bios attached. 1034 * with a set of bios attached.
1036 */ 1035 */
1037 plugged = mddev_check_plugged(mddev);
1038 1036
1039 disks = conf->raid_disks * 2; 1037 disks = conf->raid_disks * 2;
1040 retry_write: 1038 retry_write:
@@ -1191,6 +1189,8 @@ read_again:
1191 bio_list_add(&conf->pending_bio_list, mbio); 1189 bio_list_add(&conf->pending_bio_list, mbio);
1192 conf->pending_count++; 1190 conf->pending_count++;
1193 spin_unlock_irqrestore(&conf->device_lock, flags); 1191 spin_unlock_irqrestore(&conf->device_lock, flags);
1192 if (!mddev_check_plugged(mddev))
1193 md_wakeup_thread(mddev->thread);
1194 } 1194 }
1195 /* Mustn't call r1_bio_write_done before this next test, 1195 /* Mustn't call r1_bio_write_done before this next test,
1196 * as it could result in the bio being freed. 1196 * as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ read_again:
1213 1213
1214 /* In case raid1d snuck in to freeze_array */ 1214 /* In case raid1d snuck in to freeze_array */
1215 wake_up(&conf->wait_barrier); 1215 wake_up(&conf->wait_barrier);
1216
1217 if (do_sync || !bitmap || !plugged)
1218 md_wakeup_thread(mddev->thread);
1219} 1216}
1220 1217
1221static void status(struct seq_file *seq, struct mddev *mddev) 1218static void status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index edc1088a1320..acf5a828c7e1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1039 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 1039 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
1040 unsigned long flags; 1040 unsigned long flags;
1041 struct md_rdev *blocked_rdev; 1041 struct md_rdev *blocked_rdev;
1042 int plugged;
1043 int sectors_handled; 1042 int sectors_handled;
1044 int max_sectors; 1043 int max_sectors;
1045 int sectors; 1044 int sectors;
@@ -1239,7 +1238,6 @@ read_again:
1239 * of r10_bios is recored in bio->bi_phys_segments just as with 1238 * of r10_bios is recored in bio->bi_phys_segments just as with
1240 * the read case. 1239 * the read case.
1241 */ 1240 */
1242 plugged = mddev_check_plugged(mddev);
1243 1241
1244 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ 1242 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
1245 raid10_find_phys(conf, r10_bio); 1243 raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ retry_write:
1396 bio_list_add(&conf->pending_bio_list, mbio); 1394 bio_list_add(&conf->pending_bio_list, mbio);
1397 conf->pending_count++; 1395 conf->pending_count++;
1398 spin_unlock_irqrestore(&conf->device_lock, flags); 1396 spin_unlock_irqrestore(&conf->device_lock, flags);
1397 if (!mddev_check_plugged(mddev, 0, 0))
1398 md_wakeup_thread(mddev->thread);
1399 1399
1400 if (!r10_bio->devs[i].repl_bio) 1400 if (!r10_bio->devs[i].repl_bio)
1401 continue; 1401 continue;
@@ -1423,6 +1423,8 @@ retry_write:
1423 bio_list_add(&conf->pending_bio_list, mbio); 1423 bio_list_add(&conf->pending_bio_list, mbio);
1424 conf->pending_count++; 1424 conf->pending_count++;
1425 spin_unlock_irqrestore(&conf->device_lock, flags); 1425 spin_unlock_irqrestore(&conf->device_lock, flags);
1426 if (!mddev_check_plugged(mddev))
1427 md_wakeup_thread(mddev->thread);
1426 } 1428 }
1427 1429
1428 /* Don't remove the bias on 'remaining' (one_write_done) until 1430 /* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ retry_write:
1448 1450
1449 /* In case raid10d snuck in to freeze_array */ 1451 /* In case raid10d snuck in to freeze_array */
1450 wake_up(&conf->wait_barrier); 1452 wake_up(&conf->wait_barrier);
1451
1452 if (do_sync || !mddev->bitmap || !plugged)
1453 md_wakeup_thread(mddev->thread);
1454} 1453}
1455 1454
1456static void status(struct seq_file *seq, struct mddev *mddev) 1455static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev)
2661 blk_start_plug(&plug); 2660 blk_start_plug(&plug);
2662 for (;;) { 2661 for (;;) {
2663 2662
2664 flush_pending_writes(conf); 2663 if (atomic_read(&mddev->plug_cnt) == 0)
2664 flush_pending_writes(conf);
2665 2665
2666 spin_lock_irqsave(&conf->device_lock, flags); 2666 spin_lock_irqsave(&conf->device_lock, flags);
2667 if (list_empty(head)) { 2667 if (list_empty(head)) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7245a9df35a9..04348d76bb30 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3997,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3997 struct stripe_head *sh; 3997 struct stripe_head *sh;
3998 const int rw = bio_data_dir(bi); 3998 const int rw = bio_data_dir(bi);
3999 int remaining; 3999 int remaining;
4000 int plugged;
4001 4000
4002 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 4001 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
4003 md_flush_request(mddev, bi); 4002 md_flush_request(mddev, bi);
@@ -4016,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4016 bi->bi_next = NULL; 4015 bi->bi_next = NULL;
4017 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4016 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
4018 4017
4019 plugged = mddev_check_plugged(mddev);
4020 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 4018 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
4021 DEFINE_WAIT(w); 4019 DEFINE_WAIT(w);
4022 int previous; 4020 int previous;
@@ -4118,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4118 if ((bi->bi_rw & REQ_SYNC) && 4116 if ((bi->bi_rw & REQ_SYNC) &&
4119 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4117 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4120 atomic_inc(&conf->preread_active_stripes); 4118 atomic_inc(&conf->preread_active_stripes);
4119 mddev_check_plugged(mddev);
4121 release_stripe(sh); 4120 release_stripe(sh);
4122 } else { 4121 } else {
4123 /* cannot get stripe for read-ahead, just give-up */ 4122 /* cannot get stripe for read-ahead, just give-up */
@@ -4125,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4125 finish_wait(&conf->wait_for_overlap, &w); 4124 finish_wait(&conf->wait_for_overlap, &w);
4126 break; 4125 break;
4127 } 4126 }
4128
4129 } 4127 }
4130 if (!plugged)
4131 md_wakeup_thread(mddev->thread);
4132 4128
4133 spin_lock_irq(&conf->device_lock); 4129 spin_lock_irq(&conf->device_lock);
4134 remaining = raid5_dec_bi_phys_segments(bi); 4130 remaining = raid5_dec_bi_phys_segments(bi);