diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 116 |
1 files changed, 81 insertions, 35 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ee2912889e7..24909eb13fec 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
671 | bi->bi_next = NULL; | 671 | bi->bi_next = NULL; |
672 | if (rrdev) | 672 | if (rrdev) |
673 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); | 673 | set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); |
674 | trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), | 674 | |
675 | bi, disk_devt(conf->mddev->gendisk), | 675 | if (conf->mddev->gendisk) |
676 | sh->dev[i].sector); | 676 | trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), |
677 | bi, disk_devt(conf->mddev->gendisk), | ||
678 | sh->dev[i].sector); | ||
677 | generic_make_request(bi); | 679 | generic_make_request(bi); |
678 | } | 680 | } |
679 | if (rrdev) { | 681 | if (rrdev) { |
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
701 | rbi->bi_io_vec[0].bv_offset = 0; | 703 | rbi->bi_io_vec[0].bv_offset = 0; |
702 | rbi->bi_size = STRIPE_SIZE; | 704 | rbi->bi_size = STRIPE_SIZE; |
703 | rbi->bi_next = NULL; | 705 | rbi->bi_next = NULL; |
704 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), | 706 | if (conf->mddev->gendisk) |
705 | rbi, disk_devt(conf->mddev->gendisk), | 707 | trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), |
706 | sh->dev[i].sector); | 708 | rbi, disk_devt(conf->mddev->gendisk), |
709 | sh->dev[i].sector); | ||
707 | generic_make_request(rbi); | 710 | generic_make_request(rbi); |
708 | } | 711 | } |
709 | if (!rdev && !rrdev) { | 712 | if (!rdev && !rrdev) { |
@@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2280 | int level = conf->level; | 2283 | int level = conf->level; |
2281 | 2284 | ||
2282 | if (rcw) { | 2285 | if (rcw) { |
2283 | /* if we are not expanding this is a proper write request, and | ||
2284 | * there will be bios with new data to be drained into the | ||
2285 | * stripe cache | ||
2286 | */ | ||
2287 | if (!expand) { | ||
2288 | sh->reconstruct_state = reconstruct_state_drain_run; | ||
2289 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2290 | } else | ||
2291 | sh->reconstruct_state = reconstruct_state_run; | ||
2292 | |||
2293 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2294 | 2286 | ||
2295 | for (i = disks; i--; ) { | 2287 | for (i = disks; i--; ) { |
2296 | struct r5dev *dev = &sh->dev[i]; | 2288 | struct r5dev *dev = &sh->dev[i]; |
@@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2303 | s->locked++; | 2295 | s->locked++; |
2304 | } | 2296 | } |
2305 | } | 2297 | } |
2298 | /* if we are not expanding this is a proper write request, and | ||
2299 | * there will be bios with new data to be drained into the | ||
2300 | * stripe cache | ||
2301 | */ | ||
2302 | if (!expand) { | ||
2303 | if (!s->locked) | ||
2304 | /* False alarm, nothing to do */ | ||
2305 | return; | ||
2306 | sh->reconstruct_state = reconstruct_state_drain_run; | ||
2307 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2308 | } else | ||
2309 | sh->reconstruct_state = reconstruct_state_run; | ||
2310 | |||
2311 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2312 | |||
2306 | if (s->locked + conf->max_degraded == disks) | 2313 | if (s->locked + conf->max_degraded == disks) |
2307 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | 2314 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) |
2308 | atomic_inc(&conf->pending_full_writes); | 2315 | atomic_inc(&conf->pending_full_writes); |
@@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2311 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 2318 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
2312 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 2319 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
2313 | 2320 | ||
2314 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | ||
2315 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | ||
2316 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2317 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2318 | |||
2319 | for (i = disks; i--; ) { | 2321 | for (i = disks; i--; ) { |
2320 | struct r5dev *dev = &sh->dev[i]; | 2322 | struct r5dev *dev = &sh->dev[i]; |
2321 | if (i == pd_idx) | 2323 | if (i == pd_idx) |
@@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
2330 | s->locked++; | 2332 | s->locked++; |
2331 | } | 2333 | } |
2332 | } | 2334 | } |
2335 | if (!s->locked) | ||
2336 | /* False alarm - nothing to do */ | ||
2337 | return; | ||
2338 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; | ||
2339 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); | ||
2340 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); | ||
2341 | set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); | ||
2333 | } | 2342 | } |
2334 | 2343 | ||
2335 | /* keep the parity disk(s) locked while asynchronous operations | 2344 | /* keep the parity disk(s) locked while asynchronous operations |
@@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, | |||
2564 | int i; | 2573 | int i; |
2565 | 2574 | ||
2566 | clear_bit(STRIPE_SYNCING, &sh->state); | 2575 | clear_bit(STRIPE_SYNCING, &sh->state); |
2576 | if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) | ||
2577 | wake_up(&conf->wait_for_overlap); | ||
2567 | s->syncing = 0; | 2578 | s->syncing = 0; |
2568 | s->replacing = 0; | 2579 | s->replacing = 0; |
2569 | /* There is nothing more to do for sync/check/repair. | 2580 | /* There is nothing more to do for sync/check/repair. |
@@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2737 | { | 2748 | { |
2738 | int i; | 2749 | int i; |
2739 | struct r5dev *dev; | 2750 | struct r5dev *dev; |
2751 | int discard_pending = 0; | ||
2740 | 2752 | ||
2741 | for (i = disks; i--; ) | 2753 | for (i = disks; i--; ) |
2742 | if (sh->dev[i].written) { | 2754 | if (sh->dev[i].written) { |
@@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2765 | STRIPE_SECTORS, | 2777 | STRIPE_SECTORS, |
2766 | !test_bit(STRIPE_DEGRADED, &sh->state), | 2778 | !test_bit(STRIPE_DEGRADED, &sh->state), |
2767 | 0); | 2779 | 0); |
2768 | } | 2780 | } else if (test_bit(R5_Discard, &dev->flags)) |
2769 | } else if (test_bit(R5_Discard, &sh->dev[i].flags)) | 2781 | discard_pending = 1; |
2770 | clear_bit(R5_Discard, &sh->dev[i].flags); | 2782 | } |
2783 | if (!discard_pending && | ||
2784 | test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { | ||
2785 | clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); | ||
2786 | clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); | ||
2787 | if (sh->qd_idx >= 0) { | ||
2788 | clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); | ||
2789 | clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); | ||
2790 | } | ||
2791 | /* now that discard is done we can proceed with any sync */ | ||
2792 | clear_bit(STRIPE_DISCARD, &sh->state); | ||
2793 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) | ||
2794 | set_bit(STRIPE_HANDLE, &sh->state); | ||
2795 | |||
2796 | } | ||
2771 | 2797 | ||
2772 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) | 2798 | if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) |
2773 | if (atomic_dec_and_test(&conf->pending_full_writes)) | 2799 | if (atomic_dec_and_test(&conf->pending_full_writes)) |
@@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
2826 | set_bit(STRIPE_HANDLE, &sh->state); | 2852 | set_bit(STRIPE_HANDLE, &sh->state); |
2827 | if (rmw < rcw && rmw > 0) { | 2853 | if (rmw < rcw && rmw > 0) { |
2828 | /* prefer read-modify-write, but need to get some data */ | 2854 | /* prefer read-modify-write, but need to get some data */ |
2829 | blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", | 2855 | if (conf->mddev->queue) |
2830 | (unsigned long long)sh->sector, rmw); | 2856 | blk_add_trace_msg(conf->mddev->queue, |
2857 | "raid5 rmw %llu %d", | ||
2858 | (unsigned long long)sh->sector, rmw); | ||
2831 | for (i = disks; i--; ) { | 2859 | for (i = disks; i--; ) { |
2832 | struct r5dev *dev = &sh->dev[i]; | 2860 | struct r5dev *dev = &sh->dev[i]; |
2833 | if ((dev->towrite || i == sh->pd_idx) && | 2861 | if ((dev->towrite || i == sh->pd_idx) && |
@@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
2877 | } | 2905 | } |
2878 | } | 2906 | } |
2879 | } | 2907 | } |
2880 | if (rcw) | 2908 | if (rcw && conf->mddev->queue) |
2881 | blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", | 2909 | blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", |
2882 | (unsigned long long)sh->sector, | 2910 | (unsigned long long)sh->sector, |
2883 | rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); | 2911 | rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); |
@@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh) | |||
3417 | return; | 3445 | return; |
3418 | } | 3446 | } |
3419 | 3447 | ||
3420 | if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { | 3448 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { |
3421 | set_bit(STRIPE_SYNCING, &sh->state); | 3449 | spin_lock(&sh->stripe_lock); |
3422 | clear_bit(STRIPE_INSYNC, &sh->state); | 3450 | /* Cannot process 'sync' concurrently with 'discard' */ |
3451 | if (!test_bit(STRIPE_DISCARD, &sh->state) && | ||
3452 | test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { | ||
3453 | set_bit(STRIPE_SYNCING, &sh->state); | ||
3454 | clear_bit(STRIPE_INSYNC, &sh->state); | ||
3455 | } | ||
3456 | spin_unlock(&sh->stripe_lock); | ||
3423 | } | 3457 | } |
3424 | clear_bit(STRIPE_DELAYED, &sh->state); | 3458 | clear_bit(STRIPE_DELAYED, &sh->state); |
3425 | 3459 | ||
@@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
3579 | test_bit(STRIPE_INSYNC, &sh->state)) { | 3613 | test_bit(STRIPE_INSYNC, &sh->state)) { |
3580 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); | 3614 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); |
3581 | clear_bit(STRIPE_SYNCING, &sh->state); | 3615 | clear_bit(STRIPE_SYNCING, &sh->state); |
3616 | if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) | ||
3617 | wake_up(&conf->wait_for_overlap); | ||
3582 | } | 3618 | } |
3583 | 3619 | ||
3584 | /* If the failed drives are just a ReadError, then we might need | 3620 | /* If the failed drives are just a ReadError, then we might need |
@@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
3982 | atomic_inc(&conf->active_aligned_reads); | 4018 | atomic_inc(&conf->active_aligned_reads); |
3983 | spin_unlock_irq(&conf->device_lock); | 4019 | spin_unlock_irq(&conf->device_lock); |
3984 | 4020 | ||
3985 | trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), | 4021 | if (mddev->gendisk) |
3986 | align_bi, disk_devt(mddev->gendisk), | 4022 | trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), |
3987 | raid_bio->bi_sector); | 4023 | align_bi, disk_devt(mddev->gendisk), |
4024 | raid_bio->bi_sector); | ||
3988 | generic_make_request(align_bi); | 4025 | generic_make_request(align_bi); |
3989 | return 1; | 4026 | return 1; |
3990 | } else { | 4027 | } else { |
@@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | |||
4078 | } | 4115 | } |
4079 | spin_unlock_irq(&conf->device_lock); | 4116 | spin_unlock_irq(&conf->device_lock); |
4080 | } | 4117 | } |
4081 | trace_block_unplug(mddev->queue, cnt, !from_schedule); | 4118 | if (mddev->queue) |
4119 | trace_block_unplug(mddev->queue, cnt, !from_schedule); | ||
4082 | kfree(cb); | 4120 | kfree(cb); |
4083 | } | 4121 | } |
4084 | 4122 | ||
@@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) | |||
4141 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); | 4179 | sh = get_active_stripe(conf, logical_sector, 0, 0, 0); |
4142 | prepare_to_wait(&conf->wait_for_overlap, &w, | 4180 | prepare_to_wait(&conf->wait_for_overlap, &w, |
4143 | TASK_UNINTERRUPTIBLE); | 4181 | TASK_UNINTERRUPTIBLE); |
4182 | set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); | ||
4183 | if (test_bit(STRIPE_SYNCING, &sh->state)) { | ||
4184 | release_stripe(sh); | ||
4185 | schedule(); | ||
4186 | goto again; | ||
4187 | } | ||
4188 | clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); | ||
4144 | spin_lock_irq(&sh->stripe_lock); | 4189 | spin_lock_irq(&sh->stripe_lock); |
4145 | for (d = 0; d < conf->raid_disks; d++) { | 4190 | for (d = 0; d < conf->raid_disks; d++) { |
4146 | if (d == sh->pd_idx || d == sh->qd_idx) | 4191 | if (d == sh->pd_idx || d == sh->qd_idx) |
@@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) | |||
4153 | goto again; | 4198 | goto again; |
4154 | } | 4199 | } |
4155 | } | 4200 | } |
4201 | set_bit(STRIPE_DISCARD, &sh->state); | ||
4156 | finish_wait(&conf->wait_for_overlap, &w); | 4202 | finish_wait(&conf->wait_for_overlap, &w); |
4157 | for (d = 0; d < conf->raid_disks; d++) { | 4203 | for (d = 0; d < conf->raid_disks; d++) { |
4158 | if (d == sh->pd_idx || d == sh->qd_idx) | 4204 | if (d == sh->pd_idx || d == sh->qd_idx) |