aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-23 18:49:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-23 18:49:49 -0400
commit22c3f2fff68abf1ccf88e1a72f61bfede7b91da0 (patch)
tree804affa383e0898c7df60e4b00b6edee8fca1de1
parentc8c1f16717b05a9f787e054ada1ddd6dbb910364 (diff)
parent238f5908bd48f9e2f4668e0289e88cba969d710c (diff)
Merge tag 'md-3.9-fixes' of git://neil.brown.name/md
Pull md fixes from NeilBrown: "A few bugfixes for md - recent regressions in raid5 - recent regressions in dmraid - a few instances of CONFIG_MULTICORE_RAID456 linger Several tagged for -stable" * tag 'md-3.9-fixes' of git://neil.brown.name/md: md: remove CONFIG_MULTICORE_RAID456 entirely md/raid5: ensure sync and DISCARD don't happen at the same time. MD: Prevent sysfs operations on uninitialized kobjects MD RAID5: Avoid accessing gendisk or queue structs when not available md/raid5: schedule_construction should abort if nothing to do.
-rw-r--r--arch/tile/configs/tilegx_defconfig1
-rw-r--r--arch/tile/configs/tilepro_defconfig1
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid5.c116
-rw-r--r--drivers/md/raid5.h5
6 files changed, 86 insertions, 47 deletions
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 8c5eff6d6df5..47684815e5c8 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -330,7 +330,6 @@ CONFIG_MD_RAID0=m
330CONFIG_MD_RAID1=m 330CONFIG_MD_RAID1=m
331CONFIG_MD_RAID10=m 331CONFIG_MD_RAID10=m
332CONFIG_MD_RAID456=m 332CONFIG_MD_RAID456=m
333CONFIG_MULTICORE_RAID456=y
334CONFIG_MD_FAULTY=m 333CONFIG_MD_FAULTY=m
335CONFIG_BLK_DEV_DM=m 334CONFIG_BLK_DEV_DM=m
336CONFIG_DM_DEBUG=y 335CONFIG_DM_DEBUG=y
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index e7a3dfcbcda7..dd2b8f0c631f 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -324,7 +324,6 @@ CONFIG_MD_RAID0=m
324CONFIG_MD_RAID1=m 324CONFIG_MD_RAID1=m
325CONFIG_MD_RAID10=m 325CONFIG_MD_RAID10=m
326CONFIG_MD_RAID456=m 326CONFIG_MD_RAID456=m
327CONFIG_MULTICORE_RAID456=y
328CONFIG_MD_FAULTY=m 327CONFIG_MD_FAULTY=m
329CONFIG_BLK_DEV_DM=m 328CONFIG_BLK_DEV_DM=m
330CONFIG_DM_DEBUG=y 329CONFIG_DM_DEBUG=y
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcb878f88796..aeceedfc530b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev)
7663 removed++; 7663 removed++;
7664 } 7664 }
7665 } 7665 }
7666 if (removed) 7666 if (removed && mddev->kobj.sd)
7667 sysfs_notify(&mddev->kobj, NULL, 7667 sysfs_notify(&mddev->kobj, NULL, "degraded");
7668 "degraded");
7669
7670 7668
7671 rdev_for_each(rdev, mddev) { 7669 rdev_for_each(rdev, mddev) {
7672 if (rdev->raid_disk >= 0 && 7670 if (rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index eca59c3074ef..d90fb1a879e1 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev)
506static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev) 506static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
507{ 507{
508 char nm[20]; 508 char nm[20];
509 if (!test_bit(Replacement, &rdev->flags)) { 509 if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
510 sprintf(nm, "rd%d", rdev->raid_disk); 510 sprintf(nm, "rd%d", rdev->raid_disk);
511 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); 511 return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
512 } else 512 } else
@@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
516static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev) 516static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
517{ 517{
518 char nm[20]; 518 char nm[20];
519 if (!test_bit(Replacement, &rdev->flags)) { 519 if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
520 sprintf(nm, "rd%d", rdev->raid_disk); 520 sprintf(nm, "rd%d", rdev->raid_disk);
521 sysfs_remove_link(&mddev->kobj, nm); 521 sysfs_remove_link(&mddev->kobj, nm);
522 } 522 }
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ee2912889e7..24909eb13fec 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
671 bi->bi_next = NULL; 671 bi->bi_next = NULL;
672 if (rrdev) 672 if (rrdev)
673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 673 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
674 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), 674
675 bi, disk_devt(conf->mddev->gendisk), 675 if (conf->mddev->gendisk)
676 sh->dev[i].sector); 676 trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
677 bi, disk_devt(conf->mddev->gendisk),
678 sh->dev[i].sector);
677 generic_make_request(bi); 679 generic_make_request(bi);
678 } 680 }
679 if (rrdev) { 681 if (rrdev) {
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
701 rbi->bi_io_vec[0].bv_offset = 0; 703 rbi->bi_io_vec[0].bv_offset = 0;
702 rbi->bi_size = STRIPE_SIZE; 704 rbi->bi_size = STRIPE_SIZE;
703 rbi->bi_next = NULL; 705 rbi->bi_next = NULL;
704 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), 706 if (conf->mddev->gendisk)
705 rbi, disk_devt(conf->mddev->gendisk), 707 trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
706 sh->dev[i].sector); 708 rbi, disk_devt(conf->mddev->gendisk),
709 sh->dev[i].sector);
707 generic_make_request(rbi); 710 generic_make_request(rbi);
708 } 711 }
709 if (!rdev && !rrdev) { 712 if (!rdev && !rrdev) {
@@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2280 int level = conf->level; 2283 int level = conf->level;
2281 2284
2282 if (rcw) { 2285 if (rcw) {
2283 /* if we are not expanding this is a proper write request, and
2284 * there will be bios with new data to be drained into the
2285 * stripe cache
2286 */
2287 if (!expand) {
2288 sh->reconstruct_state = reconstruct_state_drain_run;
2289 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2290 } else
2291 sh->reconstruct_state = reconstruct_state_run;
2292
2293 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2294 2286
2295 for (i = disks; i--; ) { 2287 for (i = disks; i--; ) {
2296 struct r5dev *dev = &sh->dev[i]; 2288 struct r5dev *dev = &sh->dev[i];
@@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2303 s->locked++; 2295 s->locked++;
2304 } 2296 }
2305 } 2297 }
2298 /* if we are not expanding this is a proper write request, and
2299 * there will be bios with new data to be drained into the
2300 * stripe cache
2301 */
2302 if (!expand) {
2303 if (!s->locked)
2304 /* False alarm, nothing to do */
2305 return;
2306 sh->reconstruct_state = reconstruct_state_drain_run;
2307 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2308 } else
2309 sh->reconstruct_state = reconstruct_state_run;
2310
2311 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2312
2306 if (s->locked + conf->max_degraded == disks) 2313 if (s->locked + conf->max_degraded == disks)
2307 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) 2314 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2308 atomic_inc(&conf->pending_full_writes); 2315 atomic_inc(&conf->pending_full_writes);
@@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2311 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 2318 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
2312 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 2319 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
2313 2320
2314 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
2315 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
2316 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2317 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2318
2319 for (i = disks; i--; ) { 2321 for (i = disks; i--; ) {
2320 struct r5dev *dev = &sh->dev[i]; 2322 struct r5dev *dev = &sh->dev[i];
2321 if (i == pd_idx) 2323 if (i == pd_idx)
@@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
2330 s->locked++; 2332 s->locked++;
2331 } 2333 }
2332 } 2334 }
2335 if (!s->locked)
2336 /* False alarm - nothing to do */
2337 return;
2338 sh->reconstruct_state = reconstruct_state_prexor_drain_run;
2339 set_bit(STRIPE_OP_PREXOR, &s->ops_request);
2340 set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
2341 set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
2333 } 2342 }
2334 2343
2335 /* keep the parity disk(s) locked while asynchronous operations 2344 /* keep the parity disk(s) locked while asynchronous operations
@@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
2564 int i; 2573 int i;
2565 2574
2566 clear_bit(STRIPE_SYNCING, &sh->state); 2575 clear_bit(STRIPE_SYNCING, &sh->state);
2576 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
2577 wake_up(&conf->wait_for_overlap);
2567 s->syncing = 0; 2578 s->syncing = 0;
2568 s->replacing = 0; 2579 s->replacing = 0;
2569 /* There is nothing more to do for sync/check/repair. 2580 /* There is nothing more to do for sync/check/repair.
@@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2737{ 2748{
2738 int i; 2749 int i;
2739 struct r5dev *dev; 2750 struct r5dev *dev;
2751 int discard_pending = 0;
2740 2752
2741 for (i = disks; i--; ) 2753 for (i = disks; i--; )
2742 if (sh->dev[i].written) { 2754 if (sh->dev[i].written) {
@@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2765 STRIPE_SECTORS, 2777 STRIPE_SECTORS,
2766 !test_bit(STRIPE_DEGRADED, &sh->state), 2778 !test_bit(STRIPE_DEGRADED, &sh->state),
2767 0); 2779 0);
2768 } 2780 } else if (test_bit(R5_Discard, &dev->flags))
2769 } else if (test_bit(R5_Discard, &sh->dev[i].flags)) 2781 discard_pending = 1;
2770 clear_bit(R5_Discard, &sh->dev[i].flags); 2782 }
2783 if (!discard_pending &&
2784 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
2785 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
2786 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2787 if (sh->qd_idx >= 0) {
2788 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
2789 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
2790 }
2791 /* now that discard is done we can proceed with any sync */
2792 clear_bit(STRIPE_DISCARD, &sh->state);
2793 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
2794 set_bit(STRIPE_HANDLE, &sh->state);
2795
2796 }
2771 2797
2772 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 2798 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
2773 if (atomic_dec_and_test(&conf->pending_full_writes)) 2799 if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2826 set_bit(STRIPE_HANDLE, &sh->state); 2852 set_bit(STRIPE_HANDLE, &sh->state);
2827 if (rmw < rcw && rmw > 0) { 2853 if (rmw < rcw && rmw > 0) {
2828 /* prefer read-modify-write, but need to get some data */ 2854 /* prefer read-modify-write, but need to get some data */
2829 blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", 2855 if (conf->mddev->queue)
2830 (unsigned long long)sh->sector, rmw); 2856 blk_add_trace_msg(conf->mddev->queue,
2857 "raid5 rmw %llu %d",
2858 (unsigned long long)sh->sector, rmw);
2831 for (i = disks; i--; ) { 2859 for (i = disks; i--; ) {
2832 struct r5dev *dev = &sh->dev[i]; 2860 struct r5dev *dev = &sh->dev[i];
2833 if ((dev->towrite || i == sh->pd_idx) && 2861 if ((dev->towrite || i == sh->pd_idx) &&
@@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
2877 } 2905 }
2878 } 2906 }
2879 } 2907 }
2880 if (rcw) 2908 if (rcw && conf->mddev->queue)
2881 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d", 2909 blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
2882 (unsigned long long)sh->sector, 2910 (unsigned long long)sh->sector,
2883 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); 2911 rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)
3417 return; 3445 return;
3418 } 3446 }
3419 3447
3420 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3448 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3421 set_bit(STRIPE_SYNCING, &sh->state); 3449 spin_lock(&sh->stripe_lock);
3422 clear_bit(STRIPE_INSYNC, &sh->state); 3450 /* Cannot process 'sync' concurrently with 'discard' */
3451 if (!test_bit(STRIPE_DISCARD, &sh->state) &&
3452 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3453 set_bit(STRIPE_SYNCING, &sh->state);
3454 clear_bit(STRIPE_INSYNC, &sh->state);
3455 }
3456 spin_unlock(&sh->stripe_lock);
3423 } 3457 }
3424 clear_bit(STRIPE_DELAYED, &sh->state); 3458 clear_bit(STRIPE_DELAYED, &sh->state);
3425 3459
@@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)
3579 test_bit(STRIPE_INSYNC, &sh->state)) { 3613 test_bit(STRIPE_INSYNC, &sh->state)) {
3580 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3614 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
3581 clear_bit(STRIPE_SYNCING, &sh->state); 3615 clear_bit(STRIPE_SYNCING, &sh->state);
3616 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
3617 wake_up(&conf->wait_for_overlap);
3582 } 3618 }
3583 3619
3584 /* If the failed drives are just a ReadError, then we might need 3620 /* If the failed drives are just a ReadError, then we might need
@@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3982 atomic_inc(&conf->active_aligned_reads); 4018 atomic_inc(&conf->active_aligned_reads);
3983 spin_unlock_irq(&conf->device_lock); 4019 spin_unlock_irq(&conf->device_lock);
3984 4020
3985 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), 4021 if (mddev->gendisk)
3986 align_bi, disk_devt(mddev->gendisk), 4022 trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
3987 raid_bio->bi_sector); 4023 align_bi, disk_devt(mddev->gendisk),
4024 raid_bio->bi_sector);
3988 generic_make_request(align_bi); 4025 generic_make_request(align_bi);
3989 return 1; 4026 return 1;
3990 } else { 4027 } else {
@@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4078 } 4115 }
4079 spin_unlock_irq(&conf->device_lock); 4116 spin_unlock_irq(&conf->device_lock);
4080 } 4117 }
4081 trace_block_unplug(mddev->queue, cnt, !from_schedule); 4118 if (mddev->queue)
4119 trace_block_unplug(mddev->queue, cnt, !from_schedule);
4082 kfree(cb); 4120 kfree(cb);
4083} 4121}
4084 4122
@@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4141 sh = get_active_stripe(conf, logical_sector, 0, 0, 0); 4179 sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
4142 prepare_to_wait(&conf->wait_for_overlap, &w, 4180 prepare_to_wait(&conf->wait_for_overlap, &w,
4143 TASK_UNINTERRUPTIBLE); 4181 TASK_UNINTERRUPTIBLE);
4182 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4183 if (test_bit(STRIPE_SYNCING, &sh->state)) {
4184 release_stripe(sh);
4185 schedule();
4186 goto again;
4187 }
4188 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4144 spin_lock_irq(&sh->stripe_lock); 4189 spin_lock_irq(&sh->stripe_lock);
4145 for (d = 0; d < conf->raid_disks; d++) { 4190 for (d = 0; d < conf->raid_disks; d++) {
4146 if (d == sh->pd_idx || d == sh->qd_idx) 4191 if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4153 goto again; 4198 goto again;
4154 } 4199 }
4155 } 4200 }
4201 set_bit(STRIPE_DISCARD, &sh->state);
4156 finish_wait(&conf->wait_for_overlap, &w); 4202 finish_wait(&conf->wait_for_overlap, &w);
4157 for (d = 0; d < conf->raid_disks; d++) { 4203 for (d = 0; d < conf->raid_disks; d++) {
4158 if (d == sh->pd_idx || d == sh->qd_idx) 4204 if (d == sh->pd_idx || d == sh->qd_idx)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 18b2c4a8a1fd..b0b663b119a8 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -221,10 +221,6 @@ struct stripe_head {
221 struct stripe_operations { 221 struct stripe_operations {
222 int target, target2; 222 int target, target2;
223 enum sum_check_flags zero_sum_result; 223 enum sum_check_flags zero_sum_result;
224 #ifdef CONFIG_MULTICORE_RAID456
225 unsigned long request;
226 wait_queue_head_t wait_for_ops;
227 #endif
228 } ops; 224 } ops;
229 struct r5dev { 225 struct r5dev {
230 /* rreq and rvec are used for the replacement device when 226 /* rreq and rvec are used for the replacement device when
@@ -323,6 +319,7 @@ enum {
323 STRIPE_COMPUTE_RUN, 319 STRIPE_COMPUTE_RUN,
324 STRIPE_OPS_REQ_PENDING, 320 STRIPE_OPS_REQ_PENDING,
325 STRIPE_ON_UNPLUG_LIST, 321 STRIPE_ON_UNPLUG_LIST,
322 STRIPE_DISCARD,
326}; 323};
327 324
328/* 325/*