aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-14 13:03:36 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-14 13:03:36 -0500
commit37222e1c9ee3ce587f5b41fed868bd8a592a992f (patch)
treeb65f22a1e20286185463ca1a2889e593d963a393 /drivers/md/raid10.c
parent76b8f82cde2d9c13ef0c9a9aa2581b9b30b68e8c (diff)
parent06e3c817b750c131a20e82eed57a17841ea88ed2 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (27 commits) md: add 'recovery_start' per-device sysfs attribute md: rcu_read_lock() walk of mddev->disks in md_do_sync() md: integrate spares into array at earliest opportunity. md: move compat_ioctl handling into md.c md: revise Kconfig help for MD_MULTIPATH md: add MODULE_DESCRIPTION for all md related modules. raid: improve MD/raid10 handling of correctable read errors. md/raid10: print more useful messages on device failure. md/bitmap: update dirty flag when bitmap bits are explicitly set. md: Support write-intent bitmaps with externally managed metadata. md/bitmap: move setting of daemon_lastrun out of bitmap_read_sb md: support updating bitmap parameters via sysfs. md: factor out parsing of fixed-point numbers md: support bitmap offset appropriate for external-metadata arrays. md: remove needless setting of thread->timeout in raid10_quiesce md: change daemon_sleep to be in 'jiffies' rather than 'seconds'. md: move offset, daemon_sleep and chunksize out of bitmap structure md: collect bitmap-specific fields into one structure. md/raid1: add takeover support for raid5->raid1 md: add honouring of suspend_{lo,hi} to raid1. ...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c116
1 files changed, 105 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index c2cb7b87b440..d119b7b75e71 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
804 mdk_rdev_t *blocked_rdev; 804 mdk_rdev_t *blocked_rdev;
805 805
806 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { 806 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
807 bio_endio(bio, -EOPNOTSUPP); 807 md_barrier_request(mddev, bio);
808 return 0; 808 return 0;
809 } 809 }
810 810
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1432 1432
1433 1433
1434/* 1434/*
1435 * Used by fix_read_error() to decay the per rdev read_errors.
1436 * We halve the read error count for every hour that has elapsed
1437 * since the last recorded read error.
1438 *
1439 */
1440static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
1441{
1442 struct timespec cur_time_mon;
1443 unsigned long hours_since_last;
1444 unsigned int read_errors = atomic_read(&rdev->read_errors);
1445
1446 ktime_get_ts(&cur_time_mon);
1447
1448 if (rdev->last_read_error.tv_sec == 0 &&
1449 rdev->last_read_error.tv_nsec == 0) {
1450 /* first time we've seen a read error */
1451 rdev->last_read_error = cur_time_mon;
1452 return;
1453 }
1454
1455 hours_since_last = (cur_time_mon.tv_sec -
1456 rdev->last_read_error.tv_sec) / 3600;
1457
1458 rdev->last_read_error = cur_time_mon;
1459
1460 /*
1461 * if hours_since_last is > the number of bits in read_errors
1462 * just set read errors to 0. We do this to avoid
1463 * overflowing the shift of read_errors by hours_since_last.
1464 */
1465 if (hours_since_last >= 8 * sizeof(read_errors))
1466 atomic_set(&rdev->read_errors, 0);
1467 else
1468 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
1469}
1470
1471/*
1435 * This is a kernel thread which: 1472 * This is a kernel thread which:
1436 * 1473 *
1437 * 1. Retries failed read operations on working mirrors. 1474 * 1. Retries failed read operations on working mirrors.
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1444 int sect = 0; /* Offset from r10_bio->sector */ 1481 int sect = 0; /* Offset from r10_bio->sector */
1445 int sectors = r10_bio->sectors; 1482 int sectors = r10_bio->sectors;
1446 mdk_rdev_t*rdev; 1483 mdk_rdev_t*rdev;
1484 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
1485
1486 rcu_read_lock();
1487 {
1488 int d = r10_bio->devs[r10_bio->read_slot].devnum;
1489 char b[BDEVNAME_SIZE];
1490 int cur_read_error_count = 0;
1491
1492 rdev = rcu_dereference(conf->mirrors[d].rdev);
1493 bdevname(rdev->bdev, b);
1494
1495 if (test_bit(Faulty, &rdev->flags)) {
1496 rcu_read_unlock();
1497 /* drive has already been failed, just ignore any
1498 more fix_read_error() attempts */
1499 return;
1500 }
1501
1502 check_decay_read_errors(mddev, rdev);
1503 atomic_inc(&rdev->read_errors);
1504 cur_read_error_count = atomic_read(&rdev->read_errors);
1505 if (cur_read_error_count > max_read_errors) {
1506 rcu_read_unlock();
1507 printk(KERN_NOTICE
1508 "raid10: %s: Raid device exceeded "
1509 "read_error threshold "
1510 "[cur %d:max %d]\n",
1511 b, cur_read_error_count, max_read_errors);
1512 printk(KERN_NOTICE
1513 "raid10: %s: Failing raid "
1514 "device\n", b);
1515 md_error(mddev, conf->mirrors[d].rdev);
1516 return;
1517 }
1518 }
1519 rcu_read_unlock();
1520
1447 while(sectors) { 1521 while(sectors) {
1448 int s = sectors; 1522 int s = sectors;
1449 int sl = r10_bio->read_slot; 1523 int sl = r10_bio->read_slot;
@@ -1488,6 +1562,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1488 /* write it back and re-read */ 1562 /* write it back and re-read */
1489 rcu_read_lock(); 1563 rcu_read_lock();
1490 while (sl != r10_bio->read_slot) { 1564 while (sl != r10_bio->read_slot) {
1565 char b[BDEVNAME_SIZE];
1491 int d; 1566 int d;
1492 if (sl==0) 1567 if (sl==0)
1493 sl = conf->copies; 1568 sl = conf->copies;
@@ -1503,9 +1578,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1503 r10_bio->devs[sl].addr + 1578 r10_bio->devs[sl].addr +
1504 sect + rdev->data_offset, 1579 sect + rdev->data_offset,
1505 s<<9, conf->tmppage, WRITE) 1580 s<<9, conf->tmppage, WRITE)
1506 == 0) 1581 == 0) {
1507 /* Well, this device is dead */ 1582 /* Well, this device is dead */
1583 printk(KERN_NOTICE
1584 "raid10:%s: read correction "
1585 "write failed"
1586 " (%d sectors at %llu on %s)\n",
1587 mdname(mddev), s,
1588 (unsigned long long)(sect+
1589 rdev->data_offset),
1590 bdevname(rdev->bdev, b));
1591 printk(KERN_NOTICE "raid10:%s: failing "
1592 "drive\n",
1593 bdevname(rdev->bdev, b));
1508 md_error(mddev, rdev); 1594 md_error(mddev, rdev);
1595 }
1509 rdev_dec_pending(rdev, mddev); 1596 rdev_dec_pending(rdev, mddev);
1510 rcu_read_lock(); 1597 rcu_read_lock();
1511 } 1598 }
@@ -1526,10 +1613,22 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1526 if (sync_page_io(rdev->bdev, 1613 if (sync_page_io(rdev->bdev,
1527 r10_bio->devs[sl].addr + 1614 r10_bio->devs[sl].addr +
1528 sect + rdev->data_offset, 1615 sect + rdev->data_offset,
1529 s<<9, conf->tmppage, READ) == 0) 1616 s<<9, conf->tmppage,
1617 READ) == 0) {
1530 /* Well, this device is dead */ 1618 /* Well, this device is dead */
1619 printk(KERN_NOTICE
1620 "raid10:%s: unable to read back "
1621 "corrected sectors"
1622 " (%d sectors at %llu on %s)\n",
1623 mdname(mddev), s,
1624 (unsigned long long)(sect+
1625 rdev->data_offset),
1626 bdevname(rdev->bdev, b));
1627 printk(KERN_NOTICE "raid10:%s: failing drive\n",
1628 bdevname(rdev->bdev, b));
1629
1531 md_error(mddev, rdev); 1630 md_error(mddev, rdev);
1532 else 1631 } else {
1533 printk(KERN_INFO 1632 printk(KERN_INFO
1534 "raid10:%s: read error corrected" 1633 "raid10:%s: read error corrected"
1535 " (%d sectors at %llu on %s)\n", 1634 " (%d sectors at %llu on %s)\n",
@@ -1537,6 +1636,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1537 (unsigned long long)(sect+ 1636 (unsigned long long)(sect+
1538 rdev->data_offset), 1637 rdev->data_offset),
1539 bdevname(rdev->bdev, b)); 1638 bdevname(rdev->bdev, b));
1639 }
1540 1640
1541 rdev_dec_pending(rdev, mddev); 1641 rdev_dec_pending(rdev, mddev);
1542 rcu_read_lock(); 1642 rcu_read_lock();
@@ -2275,13 +2375,6 @@ static void raid10_quiesce(mddev_t *mddev, int state)
2275 lower_barrier(conf); 2375 lower_barrier(conf);
2276 break; 2376 break;
2277 } 2377 }
2278 if (mddev->thread) {
2279 if (mddev->bitmap)
2280 mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
2281 else
2282 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
2283 md_wakeup_thread(mddev->thread);
2284 }
2285} 2378}
2286 2379
2287static struct mdk_personality raid10_personality = 2380static struct mdk_personality raid10_personality =
@@ -2315,6 +2408,7 @@ static void raid_exit(void)
2315module_init(raid_init); 2408module_init(raid_init);
2316module_exit(raid_exit); 2409module_exit(raid_exit);
2317MODULE_LICENSE("GPL"); 2410MODULE_LICENSE("GPL");
2411MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
2318MODULE_ALIAS("md-personality-9"); /* RAID10 */ 2412MODULE_ALIAS("md-personality-9"); /* RAID10 */
2319MODULE_ALIAS("md-raid10"); 2413MODULE_ALIAS("md-raid10");
2320MODULE_ALIAS("md-level-10"); 2414MODULE_ALIAS("md-level-10");