aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c116
1 files changed, 81 insertions, 35 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ee2912889e7..24909eb13fec 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
671 bi->bi_next = NULL; 671 bi->bi_next = NULL;
672 if (rrdev) 672 if (rrdev)
673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
674 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), 674
675 bi, disk_devt(conf->mddev->gendisk), 675 if (conf->mddev->gendisk)
676 sh->dev[i].sector); 676 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
677 bi, disk_devt(conf->mddev->gendisk),
678 sh->dev[i].sector);
677 generic_make_request(bi); 679 generic_make_request(bi);
678 } 680 }
679 if (rrdev) { 681 if (rrdev) {
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
701 rbi->bi_io_vec[0].bv_offset = 0; 703 rbi->bi_io_vec[0].bv_offset = 0;
702 rbi->bi_size = STRIPE_SIZE; 704 rbi->bi_size = STRIPE_SIZE;
703 rbi->bi_next = NULL; 705 rbi->bi_next = NULL;
704 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), 706 if (conf->mddev->gendisk)
705 rbi, disk_devt(conf->mddev->gendisk), 707 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
706 sh->dev[i].sector); 708 rbi, disk_devt(conf->mddev->gendisk),
709 sh->dev[i].sector);
707 generic_make_request(rbi); 710 generic_make_request(rbi);
708 } 711 }
709 if (!rdev && !rrdev) { 712 if (!rdev && !rrdev) {
@@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2280 int level = conf->level; 2283 int level = conf->level;
2281 2284
2282 if (rcw) { 2285 if (rcw) {
2283 /* if we are not expanding this is a proper write request, and
2284 * there will be bios with new data to be drained into the
2285 * stripe cache
2286 */
2287 if (!expand) {
2288 sh->reconstruct_state = reconstruct_state_drain_run;
2289 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2290 } else
2291 sh->reconstruct_state = reconstruct_state_run;
2292
2293 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2294 2286
2295 for (i = disks; i--; ) { 2287 for (i = disks; i--; ) {
2296 struct r5dev *dev = &sh->dev[i]; 2288 struct r5dev *dev = &sh->dev[i];
@@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2303 s->locked++; 2295 s->locked++;
2304 } 2296 }
2305 } 2297 }
2298 /* if we are not expanding this is a proper write request, and
2299 * there will be bios with new data to be drained into the
2300 * stripe cache
2301 */
2302 if (!expand) {
2303 if (!s->locked)
2304 /* False alarm, nothing to do */
2305 return;
2306 sh->reconstruct_state = reconstruct_state_drain_run;
2307 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2308 } else
2309 sh->reconstruct_state = reconstruct_state_run;
2310
2311 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2312
2306 if (s->locked + conf->max_degraded == disks) 2313 if (s->locked + conf->max_degraded == disks)
2307 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 2314 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2308 atomic_inc(&conf->pending_full_writes); 2315 atomic_inc(&conf->pending_full_writes);
@@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2311 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 2318 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
2312 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 2319 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
2313 2320
2314 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
2315 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
2316 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2317 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2318
2319 for (i = disks; i--; ) { 2321 for (i = disks; i--; ) {
2320 struct r5dev *dev = &sh->dev[i]; 2322 struct r5dev *dev = &sh->dev[i];
2321 if (i == pd_idx) 2323 if (i == pd_idx)
@@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2330 s->locked++; 2332 s->locked++;
2331 } 2333 }
2332 } 2334 }
2335 if (!s->locked)
2336 /* False alarm - nothing to do */
2337 return;
2338 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
2339 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
2340 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2341 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2333 } 2342 }
2334 2343
2335 /* keep the parity disk(s) locked while asynchronous operations 2344 /* keep the parity disk(s) locked while asynchronous operations
@@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
2564 int i; 2573 int i;
2565 2574
2566 clear_bit(STRIPE_SYNCING, &sh->state); 2575 clear_bit(STRIPE_SYNCING, &sh->state);
2576 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
2577 wake_up(&conf->wait_for_overlap);
2567 s->syncing = 0; 2578 s->syncing = 0;
2568 s->replacing = 0; 2579 s->replacing = 0;
2569 /* There is nothing more to do for sync/check/repair. 2580 /* There is nothing more to do for sync/check/repair.
@@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2737{ 2748{
2738 int i; 2749 int i;
2739 struct r5dev *dev; 2750 struct r5dev *dev;
2751 int discard_pending = 0;
2740 2752
2741 for (i = disks; i--; ) 2753 for (i = disks; i--; )
2742 if (sh->dev[i].written) { 2754 if (sh->dev[i].written) {
@@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2765 STRIPE_SECTORS, 2777 STRIPE_SECTORS,
2766 !test_bit(STRIPE_DEGRADED, &sh->state), 2778 !test_bit(STRIPE_DEGRADED, &sh->state),
2767 0); 2779 0);
2768 } 2780 } else if (test_bit(R5_Discard, &dev->flags))
2769 } else if (test_bit(R5_Discard, &sh->dev[i].flags)) 2781 discard_pending = 1;
2770 clear_bit(R5_Discard, &sh->dev[i].flags); 2782 }
2783 if (!discard_pending &&
2784 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
2785 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
2786 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2787 if (sh->qd_idx >= 0) {
2788 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
2789 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
2790 }
2791 /* now that discard is done we can proceed with any sync */
2792 clear_bit(STRIPE_DISCARD, &sh->state);
2793 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
2794 set_bit(STRIPE_HANDLE, &sh->state);
2795
2796 }
2771 2797
2772 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 2798 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
2773 if (atomic_dec_and_test(&conf->pending_full_writes)) 2799 if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2826 set_bit(STRIPE_HANDLE, &sh->state); 2852 set_bit(STRIPE_HANDLE, &sh->state);
2827 if (rmw < rcw && rmw > 0) { 2853 if (rmw < rcw && rmw > 0) {
2828 /* prefer read-modify-write, but need to get some data */ 2854 /* prefer read-modify-write, but need to get some data */
2829 blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", 2855 if (conf->mddev->queue)
2830 (unsigned long long)sh->sector, rmw); 2856 blk_add_trace_msg(conf->mddev->queue,
2857 "raid5 rmw %llu %d",
2858 (unsigned long long)sh->sector, rmw);
2831 for (i = disks; i--; ) { 2859 for (i = disks; i--; ) {
2832 struct r5dev *dev = &sh->dev[i]; 2860 struct r5dev *dev = &sh->dev[i];
2833 if ((dev->towrite || i == sh->pd_idx) && 2861 if ((dev->towrite || i == sh->pd_idx) &&
@@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2877 } 2905 }
2878 } 2906 }
2879 } 2907 }
2880 if (rcw) 2908 if (rcw && conf->mddev->queue)
2881 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", 2909 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
2882 (unsigned long long)sh->sector, 2910 (unsigned long long)sh->sector,
2883 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); 2911 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)
3417 return; 3445 return;
3418 } 3446 }
3419 3447
3420 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3448 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3421 set_bit(STRIPE_SYNCING, &sh->state); 3449 spin_lock(&sh->stripe_lock);
3422 clear_bit(STRIPE_INSYNC, &sh->state); 3450 /* Cannot process 'sync' concurrently with 'discard' */
3451 if (!test_bit(STRIPE_DISCARD, &sh->state) &&
3452 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3453 set_bit(STRIPE_SYNCING, &sh->state);
3454 clear_bit(STRIPE_INSYNC, &sh->state);
3455 }
3456 spin_unlock(&sh->stripe_lock);
3423 } 3457 }
3424 clear_bit(STRIPE_DELAYED, &sh->state); 3458 clear_bit(STRIPE_DELAYED, &sh->state);
3425 3459
@@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)
3579 test_bit(STRIPE_INSYNC, &sh->state)) { 3613 test_bit(STRIPE_INSYNC, &sh->state)) {
3580 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3614 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
3581 clear_bit(STRIPE_SYNCING, &sh->state); 3615 clear_bit(STRIPE_SYNCING, &sh->state);
3616 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
3617 wake_up(&conf->wait_for_overlap);
3582 } 3618 }
3583 3619
3584 /* If the failed drives are just a ReadError, then we might need 3620 /* If the failed drives are just a ReadError, then we might need
@@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3982 atomic_inc(&conf->active_aligned_reads); 4018 atomic_inc(&conf->active_aligned_reads);
3983 spin_unlock_irq(&conf->device_lock); 4019 spin_unlock_irq(&conf->device_lock);
3984 4020
3985 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), 4021 if (mddev->gendisk)
3986 align_bi, disk_devt(mddev->gendisk), 4022 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
3987 raid_bio->bi_sector); 4023 align_bi, disk_devt(mddev->gendisk),
4024 raid_bio->bi_sector);
3988 generic_make_request(align_bi); 4025 generic_make_request(align_bi);
3989 return 1; 4026 return 1;
3990 } else { 4027 } else {
@@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4078 } 4115 }
4079 spin_unlock_irq(&conf->device_lock); 4116 spin_unlock_irq(&conf->device_lock);
4080 } 4117 }
4081 trace_block_unplug(mddev->queue, cnt, !from_schedule); 4118 if (mddev->queue)
4119 trace_block_unplug(mddev->queue, cnt, !from_schedule);
4082 kfree(cb); 4120 kfree(cb);
4083} 4121}
4084 4122
@@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4141 sh = get_active_stripe(conf, logical_sector, 0, 0, 0); 4179 sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
4142 prepare_to_wait(&conf->wait_for_overlap, &w, 4180 prepare_to_wait(&conf->wait_for_overlap, &w,
4143 TASK_UNINTERRUPTIBLE); 4181 TASK_UNINTERRUPTIBLE);
4182 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4183 if (test_bit(STRIPE_SYNCING, &sh->state)) {
4184 release_stripe(sh);
4185 schedule();
4186 goto again;
4187 }
4188 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4144 spin_lock_irq(&sh->stripe_lock); 4189 spin_lock_irq(&sh->stripe_lock);
4145 for (d = 0; d < conf->raid_disks; d++) { 4190 for (d = 0; d < conf->raid_disks; d++) {
4146 if (d == sh->pd_idx || d == sh->qd_idx) 4191 if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4153 goto again; 4198 goto again;
4154 } 4199 }
4155 } 4200 }
4201 set_bit(STRIPE_DISCARD, &sh->state);
4156 finish_wait(&conf->wait_for_overlap, &w); 4202 finish_wait(&conf->wait_for_overlap, &w);
4157 for (d = 0; d < conf->raid_disks; d++) { 4203 for (d = 0; d < conf->raid_disks; d++) {
4158 if (d == sh->pd_idx || d == sh->qd_idx) 4204 if (d == sh->pd_idx || d == sh->qd_idx)