aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-05-11 00:53:17 -0400
committerNeilBrown <neilb@suse.de>2011-05-11 00:53:17 -0400
commit7c4e06ff2b6a4c09638551dfde76f37f9fca5c0c (patch)
tree15ae0b698fc2d5347a776215c3bb6faaf45b2b41 /drivers/md/raid10.c
parentaf6d7b760c7547c1a410a333bdb24daed24e1043 (diff)
md/raid10: some tidying up in fix_read_error
Currently the rdev on which a read error happened could be removed before we perform the fix_error handling. This requires extra tests for NULL. So delay the rdev_dec_pending call until after the call to fix_read_error so that we can be sure that the rdev still exists. This allows an 'if' clause to be removed so the body gets re-indented back one level. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c74
1 files changed, 34 insertions, 40 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8ea0acad606b..8e4f469a75b0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
271 */ 271 */
272 set_bit(R10BIO_Uptodate, &r10_bio->state); 272 set_bit(R10BIO_Uptodate, &r10_bio->state);
273 raid_end_bio_io(r10_bio); 273 raid_end_bio_io(r10_bio);
274 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
274 } else { 275 } else {
275 /* 276 /*
276 * oops, read error: 277 * oops, read error - keep the refcount on the rdev
277 */ 278 */
278 char b[BDEVNAME_SIZE]; 279 char b[BDEVNAME_SIZE];
279 if (printk_ratelimit()) 280 if (printk_ratelimit())
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
282 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); 283 bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
283 reschedule_retry(r10_bio); 284 reschedule_retry(r10_bio);
284 } 285 }
285
286 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
287} 286}
288 287
289static void raid10_end_write_request(struct bio *bio, int error) 288static void raid10_end_write_request(struct bio *bio, int error)
@@ -1438,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1438 int max_read_errors = atomic_read(&mddev->max_corr_read_errors); 1437 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
1439 int d = r10_bio->devs[r10_bio->read_slot].devnum; 1438 int d = r10_bio->devs[r10_bio->read_slot].devnum;
1440 1439
1441 rcu_read_lock(); 1440 /* still own a reference to this rdev, so it cannot
1442 rdev = rcu_dereference(conf->mirrors[d].rdev); 1441 * have been cleared recently.
1443 if (rdev) { /* If rdev is not NULL */ 1442 */
1444 char b[BDEVNAME_SIZE]; 1443 rdev = conf->mirrors[d].rdev;
1445 int cur_read_error_count = 0;
1446 1444
1447 bdevname(rdev->bdev, b); 1445 if (test_bit(Faulty, &rdev->flags))
1446 /* drive has already been failed, just ignore any
1447 more fix_read_error() attempts */
1448 return;
1448 1449
1449 if (test_bit(Faulty, &rdev->flags)) { 1450 check_decay_read_errors(mddev, rdev);
1450 rcu_read_unlock(); 1451 atomic_inc(&rdev->read_errors);
1451 /* drive has already been failed, just ignore any 1452 if (atomic_read(&rdev->read_errors) > max_read_errors) {
1452 more fix_read_error() attempts */ 1453 char b[BDEVNAME_SIZE];
1453 return; 1454 bdevname(rdev->bdev, b);
1454 }
1455 1455
1456 check_decay_read_errors(mddev, rdev); 1456 printk(KERN_NOTICE
1457 atomic_inc(&rdev->read_errors); 1457 "md/raid10:%s: %s: Raid device exceeded "
1458 cur_read_error_count = atomic_read(&rdev->read_errors); 1458 "read_error threshold [cur %d:max %d]\n",
1459 if (cur_read_error_count > max_read_errors) { 1459 mdname(mddev), b,
1460 rcu_read_unlock(); 1460 atomic_read(&rdev->read_errors), max_read_errors);
1461 printk(KERN_NOTICE 1461 printk(KERN_NOTICE
1462 "md/raid10:%s: %s: Raid device exceeded " 1462 "md/raid10:%s: %s: Failing raid device\n",
1463 "read_error threshold " 1463 mdname(mddev), b);
1464 "[cur %d:max %d]\n", 1464 md_error(mddev, conf->mirrors[d].rdev);
1465 mdname(mddev), 1465 return;
1466 b, cur_read_error_count, max_read_errors);
1467 printk(KERN_NOTICE
1468 "md/raid10:%s: %s: Failing raid "
1469 "device\n", mdname(mddev), b);
1470 md_error(mddev, conf->mirrors[d].rdev);
1471 return;
1472 }
1473 } 1466 }
1474 rcu_read_unlock();
1475 1467
1476 while(sectors) { 1468 while(sectors) {
1477 int s = sectors; 1469 int s = sectors;
@@ -1540,8 +1532,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1540 "write failed" 1532 "write failed"
1541 " (%d sectors at %llu on %s)\n", 1533 " (%d sectors at %llu on %s)\n",
1542 mdname(mddev), s, 1534 mdname(mddev), s,
1543 (unsigned long long)(sect+ 1535 (unsigned long long)(
1544 rdev->data_offset), 1536 sect + rdev->data_offset),
1545 bdevname(rdev->bdev, b)); 1537 bdevname(rdev->bdev, b));
1546 printk(KERN_NOTICE "md/raid10:%s: %s: failing " 1538 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
1547 "drive\n", 1539 "drive\n",
@@ -1577,8 +1569,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1577 "corrected sectors" 1569 "corrected sectors"
1578 " (%d sectors at %llu on %s)\n", 1570 " (%d sectors at %llu on %s)\n",
1579 mdname(mddev), s, 1571 mdname(mddev), s,
1580 (unsigned long long)(sect+ 1572 (unsigned long long)(
1581 rdev->data_offset), 1573 sect + rdev->data_offset),
1582 bdevname(rdev->bdev, b)); 1574 bdevname(rdev->bdev, b));
1583 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", 1575 printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n",
1584 mdname(mddev), 1576 mdname(mddev),
@@ -1590,8 +1582,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1590 "md/raid10:%s: read error corrected" 1582 "md/raid10:%s: read error corrected"
1591 " (%d sectors at %llu on %s)\n", 1583 " (%d sectors at %llu on %s)\n",
1592 mdname(mddev), s, 1584 mdname(mddev), s,
1593 (unsigned long long)(sect+ 1585 (unsigned long long)(
1594 rdev->data_offset), 1586 sect + rdev->data_offset),
1595 bdevname(rdev->bdev, b)); 1587 bdevname(rdev->bdev, b));
1596 } 1588 }
1597 1589
@@ -1641,7 +1633,8 @@ static void raid10d(mddev_t *mddev)
1641 else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) 1633 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
1642 recovery_request_write(mddev, r10_bio); 1634 recovery_request_write(mddev, r10_bio);
1643 else { 1635 else {
1644 int mirror; 1636 int slot = r10_bio->read_slot;
1637 int mirror = r10_bio->devs[slot].devnum;
1645 /* we got a read error. Maybe the drive is bad. Maybe just 1638 /* we got a read error. Maybe the drive is bad. Maybe just
1646 * the block and we can fix it. 1639 * the block and we can fix it.
1647 * We freeze all other IO, and try reading the block from 1640 * We freeze all other IO, and try reading the block from
@@ -1655,6 +1648,7 @@ static void raid10d(mddev_t *mddev)
1655 fix_read_error(conf, mddev, r10_bio); 1648 fix_read_error(conf, mddev, r10_bio);
1656 unfreeze_array(conf); 1649 unfreeze_array(conf);
1657 } 1650 }
1651 rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
1658 1652
1659 bio = r10_bio->devs[r10_bio->read_slot].bio; 1653 bio = r10_bio->devs[r10_bio->read_slot].bio;
1660 r10_bio->devs[r10_bio->read_slot].bio = 1654 r10_bio->devs[r10_bio->read_slot].bio =