diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-03 09:03:02 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-03 09:03:02 -0400 |
commit | a8cac817764a494705aebd99fd51bdf6cdc28ec9 (patch) | |
tree | 202847337f42b188755a53a7b57b502a7cdc0ad3 /drivers/md/raid5.c | |
parent | b4b3bd96f26586e53ab5482f1869221dd1b5ac36 (diff) | |
parent | 543cf4cb3fe6f6cae3651ba918b9c56200b257d0 (diff) |
Merge commit 'v2.6.26-rc8' into x86/mce
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 80 |
1 files changed, 54 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0cb809..c37e256b1176 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -94,6 +94,8 @@ | |||
94 | #define __inline__ | 94 | #define __inline__ |
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | #define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) | ||
98 | |||
97 | #if !RAID6_USE_EMPTY_ZERO_PAGE | 99 | #if !RAID6_USE_EMPTY_ZERO_PAGE |
98 | /* In .bss so it's zeroed */ | 100 | /* In .bss so it's zeroed */ |
99 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 101 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); |
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1143 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 1145 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
1144 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | 1146 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { |
1145 | rdev = conf->disks[i].rdev; | 1147 | rdev = conf->disks[i].rdev; |
1146 | printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", | 1148 | printk_rl(KERN_INFO "raid5:%s: read error corrected" |
1147 | mdname(conf->mddev), STRIPE_SECTORS, | 1149 | " (%lu sectors at %llu on %s)\n", |
1148 | (unsigned long long)(sh->sector + rdev->data_offset), | 1150 | mdname(conf->mddev), STRIPE_SECTORS, |
1149 | bdevname(rdev->bdev, b)); | 1151 | (unsigned long long)(sh->sector |
1152 | + rdev->data_offset), | ||
1153 | bdevname(rdev->bdev, b)); | ||
1150 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1154 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1151 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1155 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1152 | } | 1156 | } |
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1160 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1164 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1161 | atomic_inc(&rdev->read_errors); | 1165 | atomic_inc(&rdev->read_errors); |
1162 | if (conf->mddev->degraded) | 1166 | if (conf->mddev->degraded) |
1163 | printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", | 1167 | printk_rl(KERN_WARNING |
1164 | mdname(conf->mddev), | 1168 | "raid5:%s: read error not correctable " |
1165 | (unsigned long long)(sh->sector + rdev->data_offset), | 1169 | "(sector %llu on %s).\n", |
1166 | bdn); | 1170 | mdname(conf->mddev), |
1171 | (unsigned long long)(sh->sector | ||
1172 | + rdev->data_offset), | ||
1173 | bdn); | ||
1167 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1174 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) |
1168 | /* Oh, no!!! */ | 1175 | /* Oh, no!!! */ |
1169 | printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", | 1176 | printk_rl(KERN_WARNING |
1170 | mdname(conf->mddev), | 1177 | "raid5:%s: read error NOT corrected!! " |
1171 | (unsigned long long)(sh->sector + rdev->data_offset), | 1178 | "(sector %llu on %s).\n", |
1172 | bdn); | 1179 | mdname(conf->mddev), |
1180 | (unsigned long long)(sh->sector | ||
1181 | + rdev->data_offset), | ||
1182 | bdn); | ||
1173 | else if (atomic_read(&rdev->read_errors) | 1183 | else if (atomic_read(&rdev->read_errors) |
1174 | > conf->max_nr_stripes) | 1184 | > conf->max_nr_stripes) |
1175 | printk(KERN_WARNING | 1185 | printk(KERN_WARNING |
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1258 | /* | 1268 | /* |
1259 | * if recovery was running, make sure it aborts. | 1269 | * if recovery was running, make sure it aborts. |
1260 | */ | 1270 | */ |
1261 | set_bit(MD_RECOVERY_ERR, &mddev->recovery); | 1271 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
1262 | } | 1272 | } |
1263 | set_bit(Faulty, &rdev->flags); | 1273 | set_bit(Faulty, &rdev->flags); |
1264 | printk (KERN_ALERT | 1274 | printk (KERN_ALERT |
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
1992 | * have quiesced. | 2002 | * have quiesced. |
1993 | */ | 2003 | */ |
1994 | if ((s->uptodate == disks - 1) && | 2004 | if ((s->uptodate == disks - 1) && |
2005 | (s->failed && disk_idx == s->failed_num) && | ||
1995 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | 2006 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { |
1996 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | 2007 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); |
1997 | set_bit(R5_Wantcompute, &dev->flags); | 2008 | set_bit(R5_Wantcompute, &dev->flags); |
@@ -2077,7 +2088,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, | |||
2077 | /* we would like to get this block, possibly | 2088 | /* we would like to get this block, possibly |
2078 | * by computing it, but we might not be able to | 2089 | * by computing it, but we might not be able to |
2079 | */ | 2090 | */ |
2080 | if (s->uptodate == disks-1) { | 2091 | if ((s->uptodate == disks - 1) && |
2092 | (s->failed && (i == r6s->failed_num[0] || | ||
2093 | i == r6s->failed_num[1]))) { | ||
2081 | pr_debug("Computing stripe %llu block %d\n", | 2094 | pr_debug("Computing stripe %llu block %d\n", |
2082 | (unsigned long long)sh->sector, i); | 2095 | (unsigned long long)sh->sector, i); |
2083 | compute_block_1(sh, i, 0); | 2096 | compute_block_1(sh, i, 0); |
@@ -2369,8 +2382,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2369 | 2382 | ||
2370 | /* complete a check operation */ | 2383 | /* complete a check operation */ |
2371 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { | 2384 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { |
2372 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); | 2385 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); |
2373 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); | 2386 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); |
2374 | if (s->failed == 0) { | 2387 | if (s->failed == 0) { |
2375 | if (sh->ops.zero_sum_result == 0) | 2388 | if (sh->ops.zero_sum_result == 0) |
2376 | /* parity is correct (on disc, | 2389 | /* parity is correct (on disc, |
@@ -2400,16 +2413,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2400 | canceled_check = 1; /* STRIPE_INSYNC is not set */ | 2413 | canceled_check = 1; /* STRIPE_INSYNC is not set */ |
2401 | } | 2414 | } |
2402 | 2415 | ||
2403 | /* check if we can clear a parity disk reconstruct */ | ||
2404 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2405 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2406 | |||
2407 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2408 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2409 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2410 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2411 | } | ||
2412 | |||
2413 | /* start a new check operation if there are no failures, the stripe is | 2416 | /* start a new check operation if there are no failures, the stripe is |
2414 | * not insync, and a repair is not in flight | 2417 | * not insync, and a repair is not in flight |
2415 | */ | 2418 | */ |
@@ -2424,6 +2427,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | |||
2424 | } | 2427 | } |
2425 | } | 2428 | } |
2426 | 2429 | ||
2430 | /* check if we can clear a parity disk reconstruct */ | ||
2431 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2432 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2433 | |||
2434 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | ||
2435 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2436 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2437 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2438 | } | ||
2439 | |||
2440 | |||
2427 | /* Wait for check parity and compute block operations to complete | 2441 | /* Wait for check parity and compute block operations to complete |
2428 | * before write-back. If a failure occurred while the check operation | 2442 | * before write-back. If a failure occurred while the check operation |
2429 | * was in flight we need to cycle this stripe through handle_stripe | 2443 | * was in flight we need to cycle this stripe through handle_stripe |
@@ -2634,6 +2648,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2634 | struct r5dev *dev; | 2648 | struct r5dev *dev; |
2635 | unsigned long pending = 0; | 2649 | unsigned long pending = 0; |
2636 | mdk_rdev_t *blocked_rdev = NULL; | 2650 | mdk_rdev_t *blocked_rdev = NULL; |
2651 | int prexor; | ||
2637 | 2652 | ||
2638 | memset(&s, 0, sizeof(s)); | 2653 | memset(&s, 0, sizeof(s)); |
2639 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2654 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " |
@@ -2763,9 +2778,11 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2763 | /* leave prexor set until postxor is done, allows us to distinguish | 2778 | /* leave prexor set until postxor is done, allows us to distinguish |
2764 | * a rmw from a rcw during biodrain | 2779 | * a rmw from a rcw during biodrain |
2765 | */ | 2780 | */ |
2781 | prexor = 0; | ||
2766 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | 2782 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && |
2767 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | 2783 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { |
2768 | 2784 | ||
2785 | prexor = 1; | ||
2769 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 2786 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); |
2770 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | 2787 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); |
2771 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 2788 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); |
@@ -2799,6 +2816,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2799 | if (!test_and_set_bit( | 2816 | if (!test_and_set_bit( |
2800 | STRIPE_OP_IO, &sh->ops.pending)) | 2817 | STRIPE_OP_IO, &sh->ops.pending)) |
2801 | sh->ops.count++; | 2818 | sh->ops.count++; |
2819 | if (prexor) | ||
2820 | continue; | ||
2802 | if (!test_bit(R5_Insync, &dev->flags) || | 2821 | if (!test_bit(R5_Insync, &dev->flags) || |
2803 | (i == sh->pd_idx && s.failed == 0)) | 2822 | (i == sh->pd_idx && s.failed == 0)) |
2804 | set_bit(STRIPE_INSYNC, &sh->state); | 2823 | set_bit(STRIPE_INSYNC, &sh->state); |
@@ -4256,6 +4275,7 @@ static int run(mddev_t *mddev) | |||
4256 | goto abort; | 4275 | goto abort; |
4257 | } | 4276 | } |
4258 | spin_lock_init(&conf->device_lock); | 4277 | spin_lock_init(&conf->device_lock); |
4278 | mddev->queue->queue_lock = &conf->device_lock; | ||
4259 | init_waitqueue_head(&conf->wait_for_stripe); | 4279 | init_waitqueue_head(&conf->wait_for_stripe); |
4260 | init_waitqueue_head(&conf->wait_for_overlap); | 4280 | init_waitqueue_head(&conf->wait_for_overlap); |
4261 | INIT_LIST_HEAD(&conf->handle_list); | 4281 | INIT_LIST_HEAD(&conf->handle_list); |
@@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4562 | err = -EBUSY; | 4582 | err = -EBUSY; |
4563 | goto abort; | 4583 | goto abort; |
4564 | } | 4584 | } |
4585 | /* Only remove non-faulty devices if recovery | ||
4586 | * isn't possible. | ||
4587 | */ | ||
4588 | if (!test_bit(Faulty, &rdev->flags) && | ||
4589 | mddev->degraded <= conf->max_degraded) { | ||
4590 | err = -EBUSY; | ||
4591 | goto abort; | ||
4592 | } | ||
4565 | p->rdev = NULL; | 4593 | p->rdev = NULL; |
4566 | synchronize_rcu(); | 4594 | synchronize_rcu(); |
4567 | if (atomic_read(&rdev->nr_pending)) { | 4595 | if (atomic_read(&rdev->nr_pending)) { |