aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-03 09:03:02 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-03 09:03:02 -0400
commita8cac817764a494705aebd99fd51bdf6cdc28ec9 (patch)
tree202847337f42b188755a53a7b57b502a7cdc0ad3 /drivers/md/raid5.c
parentb4b3bd96f26586e53ab5482f1869221dd1b5ac36 (diff)
parent543cf4cb3fe6f6cae3651ba918b9c56200b257d0 (diff)
Merge commit 'v2.6.26-rc8' into x86/mce
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c80
1 files changed, 54 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0cb809..c37e256b1176 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
94#define __inline__ 94#define __inline__
95#endif 95#endif
96 96
97#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
98
97#if !RAID6_USE_EMPTY_ZERO_PAGE 99#if !RAID6_USE_EMPTY_ZERO_PAGE
98/* In .bss so it's zeroed */ 100/* In .bss so it's zeroed */
99const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 101const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
1143 set_bit(R5_UPTODATE, &sh->dev[i].flags); 1145 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1144 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 1146 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1145 rdev = conf->disks[i].rdev; 1147 rdev = conf->disks[i].rdev;
1146 printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", 1148 printk_rl(KERN_INFO "raid5:%s: read error corrected"
1147 mdname(conf->mddev), STRIPE_SECTORS, 1149 " (%lu sectors at %llu on %s)\n",
1148 (unsigned long long)(sh->sector + rdev->data_offset), 1150 mdname(conf->mddev), STRIPE_SECTORS,
1149 bdevname(rdev->bdev, b)); 1151 (unsigned long long)(sh->sector
1152 + rdev->data_offset),
1153 bdevname(rdev->bdev, b));
1150 clear_bit(R5_ReadError, &sh->dev[i].flags); 1154 clear_bit(R5_ReadError, &sh->dev[i].flags);
1151 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1155 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1152 } 1156 }
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
1160 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1164 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1161 atomic_inc(&rdev->read_errors); 1165 atomic_inc(&rdev->read_errors);
1162 if (conf->mddev->degraded) 1166 if (conf->mddev->degraded)
1163 printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", 1167 printk_rl(KERN_WARNING
1164 mdname(conf->mddev), 1168 "raid5:%s: read error not correctable "
1165 (unsigned long long)(sh->sector + rdev->data_offset), 1169 "(sector %llu on %s).\n",
1166 bdn); 1170 mdname(conf->mddev),
1171 (unsigned long long)(sh->sector
1172 + rdev->data_offset),
1173 bdn);
1167 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1174 else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
1168 /* Oh, no!!! */ 1175 /* Oh, no!!! */
1169 printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", 1176 printk_rl(KERN_WARNING
1170 mdname(conf->mddev), 1177 "raid5:%s: read error NOT corrected!! "
1171 (unsigned long long)(sh->sector + rdev->data_offset), 1178 "(sector %llu on %s).\n",
1172 bdn); 1179 mdname(conf->mddev),
1180 (unsigned long long)(sh->sector
1181 + rdev->data_offset),
1182 bdn);
1173 else if (atomic_read(&rdev->read_errors) 1183 else if (atomic_read(&rdev->read_errors)
1174 > conf->max_nr_stripes) 1184 > conf->max_nr_stripes)
1175 printk(KERN_WARNING 1185 printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1258 /* 1268 /*
1259 * if recovery was running, make sure it aborts. 1269 * if recovery was running, make sure it aborts.
1260 */ 1270 */
1261 set_bit(MD_RECOVERY_ERR, &mddev->recovery); 1271 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1262 } 1272 }
1263 set_bit(Faulty, &rdev->flags); 1273 set_bit(Faulty, &rdev->flags);
1264 printk (KERN_ALERT 1274 printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
1992 * have quiesced. 2002 * have quiesced.
1993 */ 2003 */
1994 if ((s->uptodate == disks - 1) && 2004 if ((s->uptodate == disks - 1) &&
2005 (s->failed && disk_idx == s->failed_num) &&
1995 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 2006 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
1996 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 2007 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
1997 set_bit(R5_Wantcompute, &dev->flags); 2008 set_bit(R5_Wantcompute, &dev->flags);
@@ -2077,7 +2088,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
2077 /* we would like to get this block, possibly 2088 /* we would like to get this block, possibly
2078 * by computing it, but we might not be able to 2089 * by computing it, but we might not be able to
2079 */ 2090 */
2080 if (s->uptodate == disks-1) { 2091 if ((s->uptodate == disks - 1) &&
2092 (s->failed && (i == r6s->failed_num[0] ||
2093 i == r6s->failed_num[1]))) {
2081 pr_debug("Computing stripe %llu block %d\n", 2094 pr_debug("Computing stripe %llu block %d\n",
2082 (unsigned long long)sh->sector, i); 2095 (unsigned long long)sh->sector, i);
2083 compute_block_1(sh, i, 0); 2096 compute_block_1(sh, i, 0);
@@ -2369,8 +2382,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2369 2382
2370 /* complete a check operation */ 2383 /* complete a check operation */
2371 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { 2384 if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2372 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); 2385 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2373 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); 2386 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2374 if (s->failed == 0) { 2387 if (s->failed == 0) {
2375 if (sh->ops.zero_sum_result == 0) 2388 if (sh->ops.zero_sum_result == 0)
2376 /* parity is correct (on disc, 2389 /* parity is correct (on disc,
@@ -2400,16 +2413,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2400 canceled_check = 1; /* STRIPE_INSYNC is not set */ 2413 canceled_check = 1; /* STRIPE_INSYNC is not set */
2401 } 2414 }
2402 2415
2403 /* check if we can clear a parity disk reconstruct */
2404 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2405 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2406
2407 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2408 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2409 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2410 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2411 }
2412
2413 /* start a new check operation if there are no failures, the stripe is 2416 /* start a new check operation if there are no failures, the stripe is
2414 * not insync, and a repair is not in flight 2417 * not insync, and a repair is not in flight
2415 */ 2418 */
@@ -2424,6 +2427,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2424 } 2427 }
2425 } 2428 }
2426 2429
2430 /* check if we can clear a parity disk reconstruct */
2431 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2432 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2433
2434 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2435 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2436 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2437 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2438 }
2439
2440
2427 /* Wait for check parity and compute block operations to complete 2441 /* Wait for check parity and compute block operations to complete
2428 * before write-back. If a failure occurred while the check operation 2442 * before write-back. If a failure occurred while the check operation
2429 * was in flight we need to cycle this stripe through handle_stripe 2443 * was in flight we need to cycle this stripe through handle_stripe
@@ -2634,6 +2648,7 @@ static void handle_stripe5(struct stripe_head *sh)
2634 struct r5dev *dev; 2648 struct r5dev *dev;
2635 unsigned long pending = 0; 2649 unsigned long pending = 0;
2636 mdk_rdev_t *blocked_rdev = NULL; 2650 mdk_rdev_t *blocked_rdev = NULL;
2651 int prexor;
2637 2652
2638 memset(&s, 0, sizeof(s)); 2653 memset(&s, 0, sizeof(s));
2639 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 2654 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2763,9 +2778,11 @@ static void handle_stripe5(struct stripe_head *sh)
2763 /* leave prexor set until postxor is done, allows us to distinguish 2778 /* leave prexor set until postxor is done, allows us to distinguish
2764 * a rmw from a rcw during biodrain 2779 * a rmw from a rcw during biodrain
2765 */ 2780 */
2781 prexor = 0;
2766 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 2782 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
2767 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 2783 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2768 2784
2785 prexor = 1;
2769 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 2786 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2770 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); 2787 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2771 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 2788 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2799,6 +2816,8 @@ static void handle_stripe5(struct stripe_head *sh)
2799 if (!test_and_set_bit( 2816 if (!test_and_set_bit(
2800 STRIPE_OP_IO, &sh->ops.pending)) 2817 STRIPE_OP_IO, &sh->ops.pending))
2801 sh->ops.count++; 2818 sh->ops.count++;
2819 if (prexor)
2820 continue;
2802 if (!test_bit(R5_Insync, &dev->flags) || 2821 if (!test_bit(R5_Insync, &dev->flags) ||
2803 (i == sh->pd_idx && s.failed == 0)) 2822 (i == sh->pd_idx && s.failed == 0))
2804 set_bit(STRIPE_INSYNC, &sh->state); 2823 set_bit(STRIPE_INSYNC, &sh->state);
@@ -4256,6 +4275,7 @@ static int run(mddev_t *mddev)
4256 goto abort; 4275 goto abort;
4257 } 4276 }
4258 spin_lock_init(&conf->device_lock); 4277 spin_lock_init(&conf->device_lock);
4278 mddev->queue->queue_lock = &conf->device_lock;
4259 init_waitqueue_head(&conf->wait_for_stripe); 4279 init_waitqueue_head(&conf->wait_for_stripe);
4260 init_waitqueue_head(&conf->wait_for_overlap); 4280 init_waitqueue_head(&conf->wait_for_overlap);
4261 INIT_LIST_HEAD(&conf->handle_list); 4281 INIT_LIST_HEAD(&conf->handle_list);
@@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4562 err = -EBUSY; 4582 err = -EBUSY;
4563 goto abort; 4583 goto abort;
4564 } 4584 }
4585 /* Only remove non-faulty devices if recovery
4586 * isn't possible.
4587 */
4588 if (!test_bit(Faulty, &rdev->flags) &&
4589 mddev->degraded <= conf->max_degraded) {
4590 err = -EBUSY;
4591 goto abort;
4592 }
4565 p->rdev = NULL; 4593 p->rdev = NULL;
4566 synchronize_rcu(); 4594 synchronize_rcu();
4567 if (atomic_read(&rdev->nr_pending)) { 4595 if (atomic_read(&rdev->nr_pending)) {