diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 94 |
1 files changed, 61 insertions, 33 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0cb809..3b27df52456b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -94,6 +94,8 @@ | |||
94 | #define __inline__ | 94 | #define __inline__ |
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) | ||
98 | |||
97 | #if !RAID6_USE_EMPTY_ZERO_PAGE | 99 | #if !RAID6_USE_EMPTY_ZERO_PAGE |
98 | /* In .bss so it's zeroed */ | 100 | /* In .bss so it's zeroed */ |
99 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 101 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); |
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1143 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1145 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1144 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 1146 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
1145 | rdev = conf->disks[i].rdev; | 1147 | rdev = conf->disks[i].rdev; |
1146 | printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", | 1148 | printk_rl(KERN_INFO "raid5:%s: read error corrected" |
1147 | mdname(conf->mddev), STRIPE_SECTORS, | 1149 | " (%lu sectors at %llu on %s)\n", |
1148 | (unsigned long long)(sh->sector + rdev->data_offset), | 1150 | mdname(conf->mddev), STRIPE_SECTORS, |
1149 | bdevname(rdev->bdev, b)); | 1151 | (unsigned long long)(sh->sector |
1152 | + rdev->data_offset), | ||
1153 | bdevname(rdev->bdev, b)); | ||
1150 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1154 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1151 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1155 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1152 | } | 1156 | } |
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1160 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1164 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1161 | atomic_inc(&rdev->read_errors); | 1165 | atomic_inc(&rdev->read_errors); |
1162 | if (conf->mddev->degraded) | 1166 | if (conf->mddev->degraded) |
1163 | printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", | 1167 | printk_rl(KERN_WARNING |
1164 | mdname(conf->mddev), | 1168 | "raid5:%s: read error not correctable " |
1165 | (unsigned long long)(sh->sector + rdev->data_offset), | 1169 | "(sector %llu on %s).\n", |
1166 | bdn); | 1170 | mdname(conf->mddev), |
1171 | (unsigned long long)(sh->sector | ||
1172 | + rdev->data_offset), | ||
1173 | bdn); | ||
1167 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1174 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
1168 | /* Oh, no!!! */ | 1175 | /* Oh, no!!! */ |
1169 | printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", | 1176 | printk_rl(KERN_WARNING |
1170 | mdname(conf->mddev), | 1177 | "raid5:%s: read error NOT corrected!! " |
1171 | (unsigned long long)(sh->sector + rdev->data_offset), | 1178 | "(sector %llu on %s).\n", |
1172 | bdn); | 1179 | mdname(conf->mddev), |
1180 | (unsigned long long)(sh->sector | ||
1181 | + rdev->data_offset), | ||
1182 | bdn); | ||
1173 | else if (atomic_read(&rdev->read_errors) | 1183 | else if (atomic_read(&rdev->read_errors) |
1174 | > conf->max_nr_stripes) | 1184 | > conf->max_nr_stripes) |
1175 | printk(KERN_WARNING | 1185 | printk(KERN_WARNING |
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1258 | /* | 1268 | /* |
1259 | * if recovery was running, make sure it aborts. | 1269 | * if recovery was running, make sure it aborts. |
1260 | */ | 1270 | */ |
1261 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 1271 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1262 | } | 1272 | } |
1263 | set_bit(Faulty, &rdev->flags); | 1273 | set_bit(Faulty, &rdev->flags); |
1264 | printk (KERN_ALERT | 1274 | printk (KERN_ALERT |
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
1992 | * have quiesced. | 2002 | * have quiesced. |
1993 | */ | 2003 | */ |
1994 | if ((s->uptodate == disks - 1) && | 2004 | if ((s->uptodate == disks - 1) && |
2005 | (s->failed && disk_idx == s->failed_num) && | ||
1995 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | 2006 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { |
1996 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | 2007 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); |
1997 | set_bit(R5_Wantcompute, &dev->flags); | 2008 | set_bit(R5_Wantcompute, &dev->flags); |
@@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
2006 | */ | 2017 | */ |
2007 | s->uptodate++; | 2018 | s->uptodate++; |
2008 | return 0; /* uptodate + compute == disks */ | 2019 | return 0; /* uptodate + compute == disks */ |
2009 | } else if ((s->uptodate < disks - 1) && | 2020 | } else if (test_bit(R5_Insync, &dev->flags)) { |
2010 | test_bit(R5_Insync, &dev->flags)) { | ||
2011 | /* Note: we hold off compute operations while checks are | ||
2012 | * in flight, but we still prefer 'compute' over 'read' | ||
2013 | * hence we only read if (uptodate < * disks-1) | ||
2014 | */ | ||
2015 | set_bit(R5_LOCKED, &dev->flags); | 2021 | set_bit(R5_LOCKED, &dev->flags); |
2016 | set_bit(R5_Wantread, &dev->flags); | 2022 | set_bit(R5_Wantread, &dev->flags); |
2017 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | 2023 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) |
@@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, | |||
2077 | /* we would like to get this block, possibly | 2083 | /* we would like to get this block, possibly |
2078 | * by computing it, but we might not be able to | 2084 | * by computing it, but we might not be able to |
2079 | */ | 2085 | */ |
2080 | if (s->uptodate == disks-1) { | 2086 | if ((s->uptodate == disks - 1) && |
2087 | (s->failed && (i == r6s->failed_num[0] || | ||
2088 | i == r6s->failed_num[1]))) { | ||
2081 | pr_debug("Computing stripe %llu block %d\n", | 2089 | pr_debug("Computing stripe %llu block %d\n", |
2082 | (unsigned long long)sh->sector, i); | 2090 | (unsigned long long)sh->sector, i); |
2083 | compute_block_1(sh, i, 0); | 2091 | compute_block_1(sh, i, 0); |
@@ -2369,8 +2377,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2369 | 2377 | ||
2370 | /* complete a check operation */ | 2378 | /* complete a check operation */ |
2371 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { | 2379 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { |
2372 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); | 2380 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); |
2373 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); | 2381 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); |
2374 | if (s->failed == 0) { | 2382 | if (s->failed == 0) { |
2375 | if (sh->ops.zero_sum_result == 0) | 2383 | if (sh->ops.zero_sum_result == 0) |
2376 | /* parity is correct (on disc, | 2384 | /* parity is correct (on disc, |
@@ -2400,16 +2408,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2400 | canceled_check = 1; /* STRIPE_INSYNC is not set */ | 2408 | canceled_check = 1; /* STRIPE_INSYNC is not set */ |
2401 | } | 2409 | } |
2402 | 2410 | ||
2403 | /* check if we can clear a parity disk reconstruct */ | ||
2404 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2405 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2406 | |||
2407 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2408 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2409 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2410 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2411 | } | ||
2412 | |||
2413 | /* start a new check operation if there are no failures, the stripe is | 2411 | /* start a new check operation if there are no failures, the stripe is |
2414 | * not insync, and a repair is not in flight | 2412 | * not insync, and a repair is not in flight |
2415 | */ | 2413 | */ |
@@ -2424,6 +2422,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2424 | } | 2422 | } |
2425 | } | 2423 | } |
2426 | 2424 | ||
2425 | /* check if we can clear a parity disk reconstruct */ | ||
2426 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2427 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2428 | |||
2429 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2430 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2431 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2432 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2433 | } | ||
2434 | |||
2435 | |||
2427 | /* Wait for check parity and compute block operations to complete | 2436 | /* Wait for check parity and compute block operations to complete |
2428 | * before write-back. If a failure occurred while the check operation | 2437 | * before write-back. If a failure occurred while the check operation |
2429 | * was in flight we need to cycle this stripe through handle_stripe | 2438 | * was in flight we need to cycle this stripe through handle_stripe |
@@ -2634,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2634 | struct r5dev *dev; | 2643 | struct r5dev *dev; |
2635 | unsigned long pending = 0; | 2644 | unsigned long pending = 0; |
2636 | mdk_rdev_t *blocked_rdev = NULL; | 2645 | mdk_rdev_t *blocked_rdev = NULL; |
2646 | int prexor; | ||
2637 | 2647 | ||
2638 | memset(&s, 0, sizeof(s)); | 2648 | memset(&s, 0, sizeof(s)); |
2639 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2649 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
@@ -2763,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2763 | /* leave prexor set until postxor is done, allows us to distinguish | 2773 | /* leave prexor set until postxor is done, allows us to distinguish |
2764 | * a rmw from a rcw during biodrain | 2774 | * a rmw from a rcw during biodrain |
2765 | */ | 2775 | */ |
2776 | prexor = 0; | ||
2766 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | 2777 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && |
2767 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | 2778 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { |
2768 | 2779 | ||
2780 | prexor = 1; | ||
2769 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 2781 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); |
2770 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | 2782 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); |
2771 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 2783 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); |
@@ -2799,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2799 | if (!test_and_set_bit( | 2811 | if (!test_and_set_bit( |
2800 | STRIPE_OP_IO, &sh->ops.pending)) | 2812 | STRIPE_OP_IO, &sh->ops.pending)) |
2801 | sh->ops.count++; | 2813 | sh->ops.count++; |
2814 | if (prexor) | ||
2815 | continue; | ||
2802 | if (!test_bit(R5_Insync, &dev->flags) || | 2816 | if (!test_bit(R5_Insync, &dev->flags) || |
2803 | (i == sh->pd_idx && s.failed == 0)) | 2817 | (i == sh->pd_idx && s.failed == 0)) |
2804 | set_bit(STRIPE_INSYNC, &sh->state); | 2818 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -2879,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2879 | 2893 | ||
2880 | for (i = conf->raid_disks; i--; ) { | 2894 | for (i = conf->raid_disks; i--; ) { |
2881 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | 2895 | set_bit(R5_Wantwrite, &sh->dev[i].flags); |
2896 | set_bit(R5_LOCKED, &dev->flags); | ||
2897 | s.locked++; | ||
2882 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | 2898 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) |
2883 | sh->ops.count++; | 2899 | sh->ops.count++; |
2884 | } | 2900 | } |
@@ -2892,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2892 | conf->raid_disks); | 2908 | conf->raid_disks); |
2893 | s.locked += handle_write_operations5(sh, 1, 1); | 2909 | s.locked += handle_write_operations5(sh, 1, 1); |
2894 | } else if (s.expanded && | 2910 | } else if (s.expanded && |
2911 | s.locked == 0 && | ||
2895 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { | 2912 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { |
2896 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 2913 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
2897 | atomic_dec(&conf->reshape_stripes); | 2914 | atomic_dec(&conf->reshape_stripes); |
@@ -4256,6 +4273,7 @@ static int run(mddev_t *mddev) | |||
4256 | goto abort; | 4273 | goto abort; |
4257 | } | 4274 | } |
4258 | spin_lock_init(&conf->device_lock); | 4275 | spin_lock_init(&conf->device_lock); |
4276 | mddev->queue->queue_lock = &conf->device_lock; | ||
4259 | init_waitqueue_head(&conf->wait_for_stripe); | 4277 | init_waitqueue_head(&conf->wait_for_stripe); |
4260 | init_waitqueue_head(&conf->wait_for_overlap); | 4278 | init_waitqueue_head(&conf->wait_for_overlap); |
4261 | INIT_LIST_HEAD(&conf->handle_list); | 4279 | INIT_LIST_HEAD(&conf->handle_list); |
@@ -4285,7 +4303,9 @@ static int run(mddev_t *mddev) | |||
4285 | " disk %d\n", bdevname(rdev->bdev,b), | 4303 | " disk %d\n", bdevname(rdev->bdev,b), |
4286 | raid_disk); | 4304 | raid_disk); |
4287 | working_disks++; | 4305 | working_disks++; |
4288 | } | 4306 | } else |
4307 | /* Cannot rely on bitmap to complete recovery */ | ||
4308 | conf->fullsync = 1; | ||
4289 | } | 4309 | } |
4290 | 4310 | ||
4291 | /* | 4311 | /* |
@@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4562 | err = -EBUSY; | 4582 | err = -EBUSY; |
4563 | goto abort; | 4583 | goto abort; |
4564 | } | 4584 | } |
4585 | /* Only remove non-faulty devices if recovery | ||
4586 | * isn't possible. | ||
4587 | */ | ||
4588 | if (!test_bit(Faulty, &rdev->flags) && | ||
4589 | mddev->degraded <= conf->max_degraded) { | ||
4590 | err = -EBUSY; | ||
4591 | goto abort; | ||
4592 | } | ||
4565 | p->rdev = NULL; | 4593 | p->rdev = NULL; |
4566 | synchronize_rcu(); | 4594 | synchronize_rcu(); |
4567 | if (atomic_read(&rdev->nr_pending)) { | 4595 | if (atomic_read(&rdev->nr_pending)) { |