aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-06-13 13:13:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-06-13 13:13:29 -0400
commit82ea4be61f45d1a7ba1f5151d90124d1ece0c07a (patch)
tree2d2e89a3074d8385478ca5c4b5918f3552d3c25e /drivers/md
parentb844db31874e3b1c3b86c65024ac7bed9f77ee42 (diff)
parent5026d7a9b2f3eb1f9bda66c18ac6bc3036ec9020 (diff)
Merge tag 'md-3.10-fixes' of git://neil.brown.name/md
Pull md bugfixes from Neil Brown: "A few bugfixes for md Some tagged for -stable" * tag 'md-3.10-fixes' of git://neil.brown.name/md: md/raid1,5,10: Disable WRITE SAME until a recovery strategy is in place md/raid1,raid10: use freeze_array in place of raise_barrier in various places. md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it. md: md_stop_writes() should always freeze recovery.
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c29
-rw-r--r--drivers/md/raid5.c4
4 files changed, 47 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 681d1099a2d5..9b82377a833b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev)
5268 5268
5269static void __md_stop_writes(struct mddev *mddev) 5269static void __md_stop_writes(struct mddev *mddev)
5270{ 5270{
5271 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5271 if (mddev->sync_thread) { 5272 if (mddev->sync_thread) {
5272 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 5273 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5274 md_reap_sync_thread(mddev); 5274 md_reap_sync_thread(mddev);
5275 } 5275 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 55951182af73..6e17f8181c4b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
417 417
418 r1_bio->bios[mirror] = NULL; 418 r1_bio->bios[mirror] = NULL;
419 to_put = bio; 419 to_put = bio;
420 set_bit(R1BIO_Uptodate, &r1_bio->state); 420 /*
421 * Do not set R1BIO_Uptodate if the current device is
422 * rebuilding or Faulty. This is because we cannot use
423 * such device for properly reading the data back (we could
424 * potentially use it, if the current write would have felt
425 * before rdev->recovery_offset, but for simplicity we don't
426 * check this here.
427 */
428 if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
429 !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
430 set_bit(R1BIO_Uptodate, &r1_bio->state);
421 431
422 /* Maybe we can clear some bad blocks. */ 432 /* Maybe we can clear some bad blocks. */
423 if (is_badblock(conf->mirrors[mirror].rdev, 433 if (is_badblock(conf->mirrors[mirror].rdev,
@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
870 wake_up(&conf->wait_barrier); 880 wake_up(&conf->wait_barrier);
871} 881}
872 882
873static void freeze_array(struct r1conf *conf) 883static void freeze_array(struct r1conf *conf, int extra)
874{ 884{
875 /* stop syncio and normal IO and wait for everything to 885 /* stop syncio and normal IO and wait for everything to
876 * go quite. 886 * go quite.
877 * We increment barrier and nr_waiting, and then 887 * We increment barrier and nr_waiting, and then
878 * wait until nr_pending match nr_queued+1 888 * wait until nr_pending match nr_queued+extra
879 * This is called in the context of one normal IO request 889 * This is called in the context of one normal IO request
880 * that has failed. Thus any sync request that might be pending 890 * that has failed. Thus any sync request that might be pending
881 * will be blocked by nr_pending, and we need to wait for 891 * will be blocked by nr_pending, and we need to wait for
882 * pending IO requests to complete or be queued for re-try. 892 * pending IO requests to complete or be queued for re-try.
883 * Thus the number queued (nr_queued) plus this request (1) 893 * Thus the number queued (nr_queued) plus this request (extra)
884 * must match the number of pending IOs (nr_pending) before 894 * must match the number of pending IOs (nr_pending) before
885 * we continue. 895 * we continue.
886 */ 896 */
@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
888 conf->barrier++; 898 conf->barrier++;
889 conf->nr_waiting++; 899 conf->nr_waiting++;
890 wait_event_lock_irq_cmd(conf->wait_barrier, 900 wait_event_lock_irq_cmd(conf->wait_barrier,
891 conf->nr_pending == conf->nr_queued+1, 901 conf->nr_pending == conf->nr_queued+extra,
892 conf->resync_lock, 902 conf->resync_lock,
893 flush_pending_writes(conf)); 903 flush_pending_writes(conf));
894 spin_unlock_irq(&conf->resync_lock); 904 spin_unlock_irq(&conf->resync_lock);
@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1544 * we wait for all outstanding requests to complete. 1554 * we wait for all outstanding requests to complete.
1545 */ 1555 */
1546 synchronize_sched(); 1556 synchronize_sched();
1547 raise_barrier(conf); 1557 freeze_array(conf, 0);
1548 lower_barrier(conf); 1558 unfreeze_array(conf);
1549 clear_bit(Unmerged, &rdev->flags); 1559 clear_bit(Unmerged, &rdev->flags);
1550 } 1560 }
1551 md_integrity_add_rdev(rdev, mddev); 1561 md_integrity_add_rdev(rdev, mddev);
@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1595 */ 1605 */
1596 struct md_rdev *repl = 1606 struct md_rdev *repl =
1597 conf->mirrors[conf->raid_disks + number].rdev; 1607 conf->mirrors[conf->raid_disks + number].rdev;
1598 raise_barrier(conf); 1608 freeze_array(conf, 0);
1599 clear_bit(Replacement, &repl->flags); 1609 clear_bit(Replacement, &repl->flags);
1600 p->rdev = repl; 1610 p->rdev = repl;
1601 conf->mirrors[conf->raid_disks + number].rdev = NULL; 1611 conf->mirrors[conf->raid_disks + number].rdev = NULL;
1602 lower_barrier(conf); 1612 unfreeze_array(conf);
1603 clear_bit(WantReplacement, &rdev->flags); 1613 clear_bit(WantReplacement, &rdev->flags);
1604 } else 1614 } else
1605 clear_bit(WantReplacement, &rdev->flags); 1615 clear_bit(WantReplacement, &rdev->flags);
@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
2195 * frozen 2205 * frozen
2196 */ 2206 */
2197 if (mddev->ro == 0) { 2207 if (mddev->ro == 0) {
2198 freeze_array(conf); 2208 freeze_array(conf, 1);
2199 fix_read_error(conf, r1_bio->read_disk, 2209 fix_read_error(conf, r1_bio->read_disk,
2200 r1_bio->sector, r1_bio->sectors); 2210 r1_bio->sector, r1_bio->sectors);
2201 unfreeze_array(conf); 2211 unfreeze_array(conf);
@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev)
2780 return PTR_ERR(conf); 2790 return PTR_ERR(conf);
2781 2791
2782 if (mddev->queue) 2792 if (mddev->queue)
2783 blk_queue_max_write_same_sectors(mddev->queue, 2793 blk_queue_max_write_same_sectors(mddev->queue, 0);
2784 mddev->chunk_sectors); 2794
2785 rdev_for_each(rdev, mddev) { 2795 rdev_for_each(rdev, mddev) {
2786 if (!mddev->gendisk) 2796 if (!mddev->gendisk)
2787 continue; 2797 continue;
@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)
2963 return -ENOMEM; 2973 return -ENOMEM;
2964 } 2974 }
2965 2975
2966 raise_barrier(conf); 2976 freeze_array(conf, 0);
2967 2977
2968 /* ok, everything is stopped */ 2978 /* ok, everything is stopped */
2969 oldpool = conf->r1bio_pool; 2979 oldpool = conf->r1bio_pool;
@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)
2994 conf->raid_disks = mddev->raid_disks = raid_disks; 3004 conf->raid_disks = mddev->raid_disks = raid_disks;
2995 mddev->delta_disks = 0; 3005 mddev->delta_disks = 0;
2996 3006
2997 lower_barrier(conf); 3007 unfreeze_array(conf);
2998 3008
2999 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3009 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
3000 md_wakeup_thread(mddev->thread); 3010 md_wakeup_thread(mddev->thread);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 59d4daa5f4c7..6ddae2501b9a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
490 sector_t first_bad; 490 sector_t first_bad;
491 int bad_sectors; 491 int bad_sectors;
492 492
493 set_bit(R10BIO_Uptodate, &r10_bio->state); 493 /*
494 * Do not set R10BIO_Uptodate if the current device is
495 * rebuilding or Faulty. This is because we cannot use
496 * such device for properly reading the data back (we could
497 * potentially use it, if the current write would have felt
498 * before rdev->recovery_offset, but for simplicity we don't
499 * check this here.
500 */
501 if (test_bit(In_sync, &rdev->flags) &&
502 !test_bit(Faulty, &rdev->flags))
503 set_bit(R10BIO_Uptodate, &r10_bio->state);
494 504
495 /* Maybe we can clear some bad blocks. */ 505 /* Maybe we can clear some bad blocks. */
496 if (is_badblock(rdev, 506 if (is_badblock(rdev,
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
1055 wake_up(&conf->wait_barrier); 1065 wake_up(&conf->wait_barrier);
1056} 1066}
1057 1067
1058static void freeze_array(struct r10conf *conf) 1068static void freeze_array(struct r10conf *conf, int extra)
1059{ 1069{
1060 /* stop syncio and normal IO and wait for everything to 1070 /* stop syncio and normal IO and wait for everything to
1061 * go quiet. 1071 * go quiet.
1062 * We increment barrier and nr_waiting, and then 1072 * We increment barrier and nr_waiting, and then
1063 * wait until nr_pending match nr_queued+1 1073 * wait until nr_pending match nr_queued+extra
1064 * This is called in the context of one normal IO request 1074 * This is called in the context of one normal IO request
1065 * that has failed. Thus any sync request that might be pending 1075 * that has failed. Thus any sync request that might be pending
1066 * will be blocked by nr_pending, and we need to wait for 1076 * will be blocked by nr_pending, and we need to wait for
1067 * pending IO requests to complete or be queued for re-try. 1077 * pending IO requests to complete or be queued for re-try.
1068 * Thus the number queued (nr_queued) plus this request (1) 1078 * Thus the number queued (nr_queued) plus this request (extra)
1069 * must match the number of pending IOs (nr_pending) before 1079 * must match the number of pending IOs (nr_pending) before
1070 * we continue. 1080 * we continue.
1071 */ 1081 */
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
1073 conf->barrier++; 1083 conf->barrier++;
1074 conf->nr_waiting++; 1084 conf->nr_waiting++;
1075 wait_event_lock_irq_cmd(conf->wait_barrier, 1085 wait_event_lock_irq_cmd(conf->wait_barrier,
1076 conf->nr_pending == conf->nr_queued+1, 1086 conf->nr_pending == conf->nr_queued+extra,
1077 conf->resync_lock, 1087 conf->resync_lock,
1078 flush_pending_writes(conf)); 1088 flush_pending_writes(conf));
1079 1089
@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1837 * we wait for all outstanding requests to complete. 1847 * we wait for all outstanding requests to complete.
1838 */ 1848 */
1839 synchronize_sched(); 1849 synchronize_sched();
1840 raise_barrier(conf, 0); 1850 freeze_array(conf, 0);
1841 lower_barrier(conf); 1851 unfreeze_array(conf);
1842 clear_bit(Unmerged, &rdev->flags); 1852 clear_bit(Unmerged, &rdev->flags);
1843 } 1853 }
1844 md_integrity_add_rdev(rdev, mddev); 1854 md_integrity_add_rdev(rdev, mddev);
@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2612 r10_bio->devs[slot].bio = NULL; 2622 r10_bio->devs[slot].bio = NULL;
2613 2623
2614 if (mddev->ro == 0) { 2624 if (mddev->ro == 0) {
2615 freeze_array(conf); 2625 freeze_array(conf, 1);
2616 fix_read_error(conf, mddev, r10_bio); 2626 fix_read_error(conf, mddev, r10_bio);
2617 unfreeze_array(conf); 2627 unfreeze_array(conf);
2618 } else 2628 } else
@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev)
3609 if (mddev->queue) { 3619 if (mddev->queue) {
3610 blk_queue_max_discard_sectors(mddev->queue, 3620 blk_queue_max_discard_sectors(mddev->queue,
3611 mddev->chunk_sectors); 3621 mddev->chunk_sectors);
3612 blk_queue_max_write_same_sectors(mddev->queue, 3622 blk_queue_max_write_same_sectors(mddev->queue, 0);
3613 mddev->chunk_sectors);
3614 blk_queue_io_min(mddev->queue, chunk_size); 3623 blk_queue_io_min(mddev->queue, chunk_size);
3615 if (conf->geo.raid_disks % conf->geo.near_copies) 3624 if (conf->geo.raid_disks % conf->geo.near_copies)
3616 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); 3625 blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 753f318c8984..05e4a105b9c7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev)
5466 if (mddev->major_version == 0 && 5466 if (mddev->major_version == 0 &&
5467 mddev->minor_version > 90) 5467 mddev->minor_version > 90)
5468 rdev->recovery_offset = reshape_offset; 5468 rdev->recovery_offset = reshape_offset;
5469 5469
5470 if (rdev->recovery_offset < reshape_offset) { 5470 if (rdev->recovery_offset < reshape_offset) {
5471 /* We need to check old and new layout */ 5471 /* We need to check old and new layout */
5472 if (!only_parity(rdev->raid_disk, 5472 if (!only_parity(rdev->raid_disk,
@@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev)
5589 */ 5589 */
5590 mddev->queue->limits.discard_zeroes_data = 0; 5590 mddev->queue->limits.discard_zeroes_data = 0;
5591 5591
5592 blk_queue_max_write_same_sectors(mddev->queue, 0);
5593
5592 rdev_for_each(rdev, mddev) { 5594 rdev_for_each(rdev, mddev) {
5593 disk_stack_limits(mddev->gendisk, rdev->bdev, 5595 disk_stack_limits(mddev->gendisk, rdev->bdev,
5594 rdev->data_offset << 9); 5596 rdev->data_offset << 9);