diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 13:13:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-06-13 13:13:29 -0400 |
commit | 82ea4be61f45d1a7ba1f5151d90124d1ece0c07a (patch) | |
tree | 2d2e89a3074d8385478ca5c4b5918f3552d3c25e /drivers/md | |
parent | b844db31874e3b1c3b86c65024ac7bed9f77ee42 (diff) | |
parent | 5026d7a9b2f3eb1f9bda66c18ac6bc3036ec9020 (diff) |
Merge tag 'md-3.10-fixes' of git://neil.brown.name/md
Pull md bugfixes from Neil Brown:
"A few bugfixes for md
Some tagged for -stable"
* tag 'md-3.10-fixes' of git://neil.brown.name/md:
md/raid1,5,10: Disable WRITE SAME until a recovery strategy is in place
md/raid1,raid10: use freeze_array in place of raise_barrier in various places.
md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it.
md: md_stop_writes() should always freeze recovery.
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 38 | ||||
-rw-r--r-- | drivers/md/raid10.c | 29 | ||||
-rw-r--r-- | drivers/md/raid5.c | 4 |
4 files changed, 47 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 681d1099a2d5..9b82377a833b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev) | |||
5268 | 5268 | ||
5269 | static void __md_stop_writes(struct mddev *mddev) | 5269 | static void __md_stop_writes(struct mddev *mddev) |
5270 | { | 5270 | { |
5271 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
5271 | if (mddev->sync_thread) { | 5272 | if (mddev->sync_thread) { |
5272 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
5273 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5273 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5274 | md_reap_sync_thread(mddev); | 5274 | md_reap_sync_thread(mddev); |
5275 | } | 5275 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 55951182af73..6e17f8181c4b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
417 | 417 | ||
418 | r1_bio->bios[mirror] = NULL; | 418 | r1_bio->bios[mirror] = NULL; |
419 | to_put = bio; | 419 | to_put = bio; |
420 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 420 | /* |
421 | * Do not set R1BIO_Uptodate if the current device is | ||
422 | * rebuilding or Faulty. This is because we cannot use | ||
423 | * such device for properly reading the data back (we could | ||
424 | * potentially use it, if the current write would have felt | ||
425 | * before rdev->recovery_offset, but for simplicity we don't | ||
426 | * check this here. | ||
427 | */ | ||
428 | if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) && | ||
429 | !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)) | ||
430 | set_bit(R1BIO_Uptodate, &r1_bio->state); | ||
421 | 431 | ||
422 | /* Maybe we can clear some bad blocks. */ | 432 | /* Maybe we can clear some bad blocks. */ |
423 | if (is_badblock(conf->mirrors[mirror].rdev, | 433 | if (is_badblock(conf->mirrors[mirror].rdev, |
@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf) | |||
870 | wake_up(&conf->wait_barrier); | 880 | wake_up(&conf->wait_barrier); |
871 | } | 881 | } |
872 | 882 | ||
873 | static void freeze_array(struct r1conf *conf) | 883 | static void freeze_array(struct r1conf *conf, int extra) |
874 | { | 884 | { |
875 | /* stop syncio and normal IO and wait for everything to | 885 | /* stop syncio and normal IO and wait for everything to |
876 | * go quite. | 886 | * go quite. |
877 | * We increment barrier and nr_waiting, and then | 887 | * We increment barrier and nr_waiting, and then |
878 | * wait until nr_pending match nr_queued+1 | 888 | * wait until nr_pending match nr_queued+extra |
879 | * This is called in the context of one normal IO request | 889 | * This is called in the context of one normal IO request |
880 | * that has failed. Thus any sync request that might be pending | 890 | * that has failed. Thus any sync request that might be pending |
881 | * will be blocked by nr_pending, and we need to wait for | 891 | * will be blocked by nr_pending, and we need to wait for |
882 | * pending IO requests to complete or be queued for re-try. | 892 | * pending IO requests to complete or be queued for re-try. |
883 | * Thus the number queued (nr_queued) plus this request (1) | 893 | * Thus the number queued (nr_queued) plus this request (extra) |
884 | * must match the number of pending IOs (nr_pending) before | 894 | * must match the number of pending IOs (nr_pending) before |
885 | * we continue. | 895 | * we continue. |
886 | */ | 896 | */ |
@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf) | |||
888 | conf->barrier++; | 898 | conf->barrier++; |
889 | conf->nr_waiting++; | 899 | conf->nr_waiting++; |
890 | wait_event_lock_irq_cmd(conf->wait_barrier, | 900 | wait_event_lock_irq_cmd(conf->wait_barrier, |
891 | conf->nr_pending == conf->nr_queued+1, | 901 | conf->nr_pending == conf->nr_queued+extra, |
892 | conf->resync_lock, | 902 | conf->resync_lock, |
893 | flush_pending_writes(conf)); | 903 | flush_pending_writes(conf)); |
894 | spin_unlock_irq(&conf->resync_lock); | 904 | spin_unlock_irq(&conf->resync_lock); |
@@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1544 | * we wait for all outstanding requests to complete. | 1554 | * we wait for all outstanding requests to complete. |
1545 | */ | 1555 | */ |
1546 | synchronize_sched(); | 1556 | synchronize_sched(); |
1547 | raise_barrier(conf); | 1557 | freeze_array(conf, 0); |
1548 | lower_barrier(conf); | 1558 | unfreeze_array(conf); |
1549 | clear_bit(Unmerged, &rdev->flags); | 1559 | clear_bit(Unmerged, &rdev->flags); |
1550 | } | 1560 | } |
1551 | md_integrity_add_rdev(rdev, mddev); | 1561 | md_integrity_add_rdev(rdev, mddev); |
@@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1595 | */ | 1605 | */ |
1596 | struct md_rdev *repl = | 1606 | struct md_rdev *repl = |
1597 | conf->mirrors[conf->raid_disks + number].rdev; | 1607 | conf->mirrors[conf->raid_disks + number].rdev; |
1598 | raise_barrier(conf); | 1608 | freeze_array(conf, 0); |
1599 | clear_bit(Replacement, &repl->flags); | 1609 | clear_bit(Replacement, &repl->flags); |
1600 | p->rdev = repl; | 1610 | p->rdev = repl; |
1601 | conf->mirrors[conf->raid_disks + number].rdev = NULL; | 1611 | conf->mirrors[conf->raid_disks + number].rdev = NULL; |
1602 | lower_barrier(conf); | 1612 | unfreeze_array(conf); |
1603 | clear_bit(WantReplacement, &rdev->flags); | 1613 | clear_bit(WantReplacement, &rdev->flags); |
1604 | } else | 1614 | } else |
1605 | clear_bit(WantReplacement, &rdev->flags); | 1615 | clear_bit(WantReplacement, &rdev->flags); |
@@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2195 | * frozen | 2205 | * frozen |
2196 | */ | 2206 | */ |
2197 | if (mddev->ro == 0) { | 2207 | if (mddev->ro == 0) { |
2198 | freeze_array(conf); | 2208 | freeze_array(conf, 1); |
2199 | fix_read_error(conf, r1_bio->read_disk, | 2209 | fix_read_error(conf, r1_bio->read_disk, |
2200 | r1_bio->sector, r1_bio->sectors); | 2210 | r1_bio->sector, r1_bio->sectors); |
2201 | unfreeze_array(conf); | 2211 | unfreeze_array(conf); |
@@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev) | |||
2780 | return PTR_ERR(conf); | 2790 | return PTR_ERR(conf); |
2781 | 2791 | ||
2782 | if (mddev->queue) | 2792 | if (mddev->queue) |
2783 | blk_queue_max_write_same_sectors(mddev->queue, | 2793 | blk_queue_max_write_same_sectors(mddev->queue, 0); |
2784 | mddev->chunk_sectors); | 2794 | |
2785 | rdev_for_each(rdev, mddev) { | 2795 | rdev_for_each(rdev, mddev) { |
2786 | if (!mddev->gendisk) | 2796 | if (!mddev->gendisk) |
2787 | continue; | 2797 | continue; |
@@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2963 | return -ENOMEM; | 2973 | return -ENOMEM; |
2964 | } | 2974 | } |
2965 | 2975 | ||
2966 | raise_barrier(conf); | 2976 | freeze_array(conf, 0); |
2967 | 2977 | ||
2968 | /* ok, everything is stopped */ | 2978 | /* ok, everything is stopped */ |
2969 | oldpool = conf->r1bio_pool; | 2979 | oldpool = conf->r1bio_pool; |
@@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2994 | conf->raid_disks = mddev->raid_disks = raid_disks; | 3004 | conf->raid_disks = mddev->raid_disks = raid_disks; |
2995 | mddev->delta_disks = 0; | 3005 | mddev->delta_disks = 0; |
2996 | 3006 | ||
2997 | lower_barrier(conf); | 3007 | unfreeze_array(conf); |
2998 | 3008 | ||
2999 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3009 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3000 | md_wakeup_thread(mddev->thread); | 3010 | md_wakeup_thread(mddev->thread); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 59d4daa5f4c7..6ddae2501b9a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
490 | sector_t first_bad; | 490 | sector_t first_bad; |
491 | int bad_sectors; | 491 | int bad_sectors; |
492 | 492 | ||
493 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 493 | /* |
494 | * Do not set R10BIO_Uptodate if the current device is | ||
495 | * rebuilding or Faulty. This is because we cannot use | ||
496 | * such device for properly reading the data back (we could | ||
497 | * potentially use it, if the current write would have felt | ||
498 | * before rdev->recovery_offset, but for simplicity we don't | ||
499 | * check this here. | ||
500 | */ | ||
501 | if (test_bit(In_sync, &rdev->flags) && | ||
502 | !test_bit(Faulty, &rdev->flags)) | ||
503 | set_bit(R10BIO_Uptodate, &r10_bio->state); | ||
494 | 504 | ||
495 | /* Maybe we can clear some bad blocks. */ | 505 | /* Maybe we can clear some bad blocks. */ |
496 | if (is_badblock(rdev, | 506 | if (is_badblock(rdev, |
@@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf) | |||
1055 | wake_up(&conf->wait_barrier); | 1065 | wake_up(&conf->wait_barrier); |
1056 | } | 1066 | } |
1057 | 1067 | ||
1058 | static void freeze_array(struct r10conf *conf) | 1068 | static void freeze_array(struct r10conf *conf, int extra) |
1059 | { | 1069 | { |
1060 | /* stop syncio and normal IO and wait for everything to | 1070 | /* stop syncio and normal IO and wait for everything to |
1061 | * go quiet. | 1071 | * go quiet. |
1062 | * We increment barrier and nr_waiting, and then | 1072 | * We increment barrier and nr_waiting, and then |
1063 | * wait until nr_pending match nr_queued+1 | 1073 | * wait until nr_pending match nr_queued+extra |
1064 | * This is called in the context of one normal IO request | 1074 | * This is called in the context of one normal IO request |
1065 | * that has failed. Thus any sync request that might be pending | 1075 | * that has failed. Thus any sync request that might be pending |
1066 | * will be blocked by nr_pending, and we need to wait for | 1076 | * will be blocked by nr_pending, and we need to wait for |
1067 | * pending IO requests to complete or be queued for re-try. | 1077 | * pending IO requests to complete or be queued for re-try. |
1068 | * Thus the number queued (nr_queued) plus this request (1) | 1078 | * Thus the number queued (nr_queued) plus this request (extra) |
1069 | * must match the number of pending IOs (nr_pending) before | 1079 | * must match the number of pending IOs (nr_pending) before |
1070 | * we continue. | 1080 | * we continue. |
1071 | */ | 1081 | */ |
@@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf) | |||
1073 | conf->barrier++; | 1083 | conf->barrier++; |
1074 | conf->nr_waiting++; | 1084 | conf->nr_waiting++; |
1075 | wait_event_lock_irq_cmd(conf->wait_barrier, | 1085 | wait_event_lock_irq_cmd(conf->wait_barrier, |
1076 | conf->nr_pending == conf->nr_queued+1, | 1086 | conf->nr_pending == conf->nr_queued+extra, |
1077 | conf->resync_lock, | 1087 | conf->resync_lock, |
1078 | flush_pending_writes(conf)); | 1088 | flush_pending_writes(conf)); |
1079 | 1089 | ||
@@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1837 | * we wait for all outstanding requests to complete. | 1847 | * we wait for all outstanding requests to complete. |
1838 | */ | 1848 | */ |
1839 | synchronize_sched(); | 1849 | synchronize_sched(); |
1840 | raise_barrier(conf, 0); | 1850 | freeze_array(conf, 0); |
1841 | lower_barrier(conf); | 1851 | unfreeze_array(conf); |
1842 | clear_bit(Unmerged, &rdev->flags); | 1852 | clear_bit(Unmerged, &rdev->flags); |
1843 | } | 1853 | } |
1844 | md_integrity_add_rdev(rdev, mddev); | 1854 | md_integrity_add_rdev(rdev, mddev); |
@@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) | |||
2612 | r10_bio->devs[slot].bio = NULL; | 2622 | r10_bio->devs[slot].bio = NULL; |
2613 | 2623 | ||
2614 | if (mddev->ro == 0) { | 2624 | if (mddev->ro == 0) { |
2615 | freeze_array(conf); | 2625 | freeze_array(conf, 1); |
2616 | fix_read_error(conf, mddev, r10_bio); | 2626 | fix_read_error(conf, mddev, r10_bio); |
2617 | unfreeze_array(conf); | 2627 | unfreeze_array(conf); |
2618 | } else | 2628 | } else |
@@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev) | |||
3609 | if (mddev->queue) { | 3619 | if (mddev->queue) { |
3610 | blk_queue_max_discard_sectors(mddev->queue, | 3620 | blk_queue_max_discard_sectors(mddev->queue, |
3611 | mddev->chunk_sectors); | 3621 | mddev->chunk_sectors); |
3612 | blk_queue_max_write_same_sectors(mddev->queue, | 3622 | blk_queue_max_write_same_sectors(mddev->queue, 0); |
3613 | mddev->chunk_sectors); | ||
3614 | blk_queue_io_min(mddev->queue, chunk_size); | 3623 | blk_queue_io_min(mddev->queue, chunk_size); |
3615 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3624 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3616 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3625 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 753f318c8984..05e4a105b9c7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev) | |||
5466 | if (mddev->major_version == 0 && | 5466 | if (mddev->major_version == 0 && |
5467 | mddev->minor_version > 90) | 5467 | mddev->minor_version > 90) |
5468 | rdev->recovery_offset = reshape_offset; | 5468 | rdev->recovery_offset = reshape_offset; |
5469 | 5469 | ||
5470 | if (rdev->recovery_offset < reshape_offset) { | 5470 | if (rdev->recovery_offset < reshape_offset) { |
5471 | /* We need to check old and new layout */ | 5471 | /* We need to check old and new layout */ |
5472 | if (!only_parity(rdev->raid_disk, | 5472 | if (!only_parity(rdev->raid_disk, |
@@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev) | |||
5589 | */ | 5589 | */ |
5590 | mddev->queue->limits.discard_zeroes_data = 0; | 5590 | mddev->queue->limits.discard_zeroes_data = 0; |
5591 | 5591 | ||
5592 | blk_queue_max_write_same_sectors(mddev->queue, 0); | ||
5593 | |||
5592 | rdev_for_each(rdev, mddev) { | 5594 | rdev_for_each(rdev, mddev) { |
5593 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5595 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5594 | rdev->data_offset << 9); | 5596 | rdev->data_offset << 9); |