diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-14 13:03:36 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-14 13:03:36 -0500 |
commit | 37222e1c9ee3ce587f5b41fed868bd8a592a992f (patch) | |
tree | b65f22a1e20286185463ca1a2889e593d963a393 /drivers/md/raid10.c | |
parent | 76b8f82cde2d9c13ef0c9a9aa2581b9b30b68e8c (diff) | |
parent | 06e3c817b750c131a20e82eed57a17841ea88ed2 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (27 commits)
md: add 'recovery_start' per-device sysfs attribute
md: rcu_read_lock() walk of mddev->disks in md_do_sync()
md: integrate spares into array at earliest opportunity.
md: move compat_ioctl handling into md.c
md: revise Kconfig help for MD_MULTIPATH
md: add MODULE_DESCRIPTION for all md related modules.
raid: improve MD/raid10 handling of correctable read errors.
md/raid10: print more useful messages on device failure.
md/bitmap: update dirty flag when bitmap bits are explicitly set.
md: Support write-intent bitmaps with externally managed metadata.
md/bitmap: move setting of daemon_lastrun out of bitmap_read_sb
md: support updating bitmap parameters via sysfs.
md: factor out parsing of fixed-point numbers
md: support bitmap offset appropriate for external-metadata arrays.
md: remove needless setting of thread->timeout in raid10_quiesce
md: change daemon_sleep to be in 'jiffies' rather than 'seconds'.
md: move offset, daemon_sleep and chunksize out of bitmap structure
md: collect bitmap-specific fields into one structure.
md/raid1: add takeover support for raid5->raid1
md: add honouring of suspend_{lo,hi} to raid1.
...
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 116 |
1 files changed, 105 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c2cb7b87b440..d119b7b75e71 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -804,7 +804,7 @@ static int make_request(struct request_queue *q, struct bio * bio) | |||
804 | mdk_rdev_t *blocked_rdev; | 804 | mdk_rdev_t *blocked_rdev; |
805 | 805 | ||
806 | if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { | 806 | if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { |
807 | bio_endio(bio, -EOPNOTSUPP); | 807 | md_barrier_request(mddev, bio); |
808 | return 0; | 808 | return 0; |
809 | } | 809 | } |
810 | 810 | ||
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) | |||
1432 | 1432 | ||
1433 | 1433 | ||
1434 | /* | 1434 | /* |
1435 | * Used by fix_read_error() to decay the per rdev read_errors. | ||
1436 | * We halve the read error count for every hour that has elapsed | ||
1437 | * since the last recorded read error. | ||
1438 | * | ||
1439 | */ | ||
1440 | static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) | ||
1441 | { | ||
1442 | struct timespec cur_time_mon; | ||
1443 | unsigned long hours_since_last; | ||
1444 | unsigned int read_errors = atomic_read(&rdev->read_errors); | ||
1445 | |||
1446 | ktime_get_ts(&cur_time_mon); | ||
1447 | |||
1448 | if (rdev->last_read_error.tv_sec == 0 && | ||
1449 | rdev->last_read_error.tv_nsec == 0) { | ||
1450 | /* first time we've seen a read error */ | ||
1451 | rdev->last_read_error = cur_time_mon; | ||
1452 | return; | ||
1453 | } | ||
1454 | |||
1455 | hours_since_last = (cur_time_mon.tv_sec - | ||
1456 | rdev->last_read_error.tv_sec) / 3600; | ||
1457 | |||
1458 | rdev->last_read_error = cur_time_mon; | ||
1459 | |||
1460 | /* | ||
1461 | * if hours_since_last is > the number of bits in read_errors | ||
1462 | * just set read errors to 0. We do this to avoid | ||
1463 | * overflowing the shift of read_errors by hours_since_last. | ||
1464 | */ | ||
1465 | if (hours_since_last >= 8 * sizeof(read_errors)) | ||
1466 | atomic_set(&rdev->read_errors, 0); | ||
1467 | else | ||
1468 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); | ||
1469 | } | ||
1470 | |||
1471 | /* | ||
1435 | * This is a kernel thread which: | 1472 | * This is a kernel thread which: |
1436 | * | 1473 | * |
1437 | * 1. Retries failed read operations on working mirrors. | 1474 | * 1. Retries failed read operations on working mirrors. |
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1444 | int sect = 0; /* Offset from r10_bio->sector */ | 1481 | int sect = 0; /* Offset from r10_bio->sector */ |
1445 | int sectors = r10_bio->sectors; | 1482 | int sectors = r10_bio->sectors; |
1446 | mdk_rdev_t*rdev; | 1483 | mdk_rdev_t*rdev; |
1484 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | ||
1485 | |||
1486 | rcu_read_lock(); | ||
1487 | { | ||
1488 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | ||
1489 | char b[BDEVNAME_SIZE]; | ||
1490 | int cur_read_error_count = 0; | ||
1491 | |||
1492 | rdev = rcu_dereference(conf->mirrors[d].rdev); | ||
1493 | bdevname(rdev->bdev, b); | ||
1494 | |||
1495 | if (test_bit(Faulty, &rdev->flags)) { | ||
1496 | rcu_read_unlock(); | ||
1497 | /* drive has already been failed, just ignore any | ||
1498 | more fix_read_error() attempts */ | ||
1499 | return; | ||
1500 | } | ||
1501 | |||
1502 | check_decay_read_errors(mddev, rdev); | ||
1503 | atomic_inc(&rdev->read_errors); | ||
1504 | cur_read_error_count = atomic_read(&rdev->read_errors); | ||
1505 | if (cur_read_error_count > max_read_errors) { | ||
1506 | rcu_read_unlock(); | ||
1507 | printk(KERN_NOTICE | ||
1508 | "raid10: %s: Raid device exceeded " | ||
1509 | "read_error threshold " | ||
1510 | "[cur %d:max %d]\n", | ||
1511 | b, cur_read_error_count, max_read_errors); | ||
1512 | printk(KERN_NOTICE | ||
1513 | "raid10: %s: Failing raid " | ||
1514 | "device\n", b); | ||
1515 | md_error(mddev, conf->mirrors[d].rdev); | ||
1516 | return; | ||
1517 | } | ||
1518 | } | ||
1519 | rcu_read_unlock(); | ||
1520 | |||
1447 | while(sectors) { | 1521 | while(sectors) { |
1448 | int s = sectors; | 1522 | int s = sectors; |
1449 | int sl = r10_bio->read_slot; | 1523 | int sl = r10_bio->read_slot; |
@@ -1488,6 +1562,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1488 | /* write it back and re-read */ | 1562 | /* write it back and re-read */ |
1489 | rcu_read_lock(); | 1563 | rcu_read_lock(); |
1490 | while (sl != r10_bio->read_slot) { | 1564 | while (sl != r10_bio->read_slot) { |
1565 | char b[BDEVNAME_SIZE]; | ||
1491 | int d; | 1566 | int d; |
1492 | if (sl==0) | 1567 | if (sl==0) |
1493 | sl = conf->copies; | 1568 | sl = conf->copies; |
@@ -1503,9 +1578,21 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1503 | r10_bio->devs[sl].addr + | 1578 | r10_bio->devs[sl].addr + |
1504 | sect + rdev->data_offset, | 1579 | sect + rdev->data_offset, |
1505 | s<<9, conf->tmppage, WRITE) | 1580 | s<<9, conf->tmppage, WRITE) |
1506 | == 0) | 1581 | == 0) { |
1507 | /* Well, this device is dead */ | 1582 | /* Well, this device is dead */ |
1583 | printk(KERN_NOTICE | ||
1584 | "raid10:%s: read correction " | ||
1585 | "write failed" | ||
1586 | " (%d sectors at %llu on %s)\n", | ||
1587 | mdname(mddev), s, | ||
1588 | (unsigned long long)(sect+ | ||
1589 | rdev->data_offset), | ||
1590 | bdevname(rdev->bdev, b)); | ||
1591 | printk(KERN_NOTICE "raid10:%s: failing " | ||
1592 | "drive\n", | ||
1593 | bdevname(rdev->bdev, b)); | ||
1508 | md_error(mddev, rdev); | 1594 | md_error(mddev, rdev); |
1595 | } | ||
1509 | rdev_dec_pending(rdev, mddev); | 1596 | rdev_dec_pending(rdev, mddev); |
1510 | rcu_read_lock(); | 1597 | rcu_read_lock(); |
1511 | } | 1598 | } |
@@ -1526,10 +1613,22 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1526 | if (sync_page_io(rdev->bdev, | 1613 | if (sync_page_io(rdev->bdev, |
1527 | r10_bio->devs[sl].addr + | 1614 | r10_bio->devs[sl].addr + |
1528 | sect + rdev->data_offset, | 1615 | sect + rdev->data_offset, |
1529 | s<<9, conf->tmppage, READ) == 0) | 1616 | s<<9, conf->tmppage, |
1617 | READ) == 0) { | ||
1530 | /* Well, this device is dead */ | 1618 | /* Well, this device is dead */ |
1619 | printk(KERN_NOTICE | ||
1620 | "raid10:%s: unable to read back " | ||
1621 | "corrected sectors" | ||
1622 | " (%d sectors at %llu on %s)\n", | ||
1623 | mdname(mddev), s, | ||
1624 | (unsigned long long)(sect+ | ||
1625 | rdev->data_offset), | ||
1626 | bdevname(rdev->bdev, b)); | ||
1627 | printk(KERN_NOTICE "raid10:%s: failing drive\n", | ||
1628 | bdevname(rdev->bdev, b)); | ||
1629 | |||
1531 | md_error(mddev, rdev); | 1630 | md_error(mddev, rdev); |
1532 | else | 1631 | } else { |
1533 | printk(KERN_INFO | 1632 | printk(KERN_INFO |
1534 | "raid10:%s: read error corrected" | 1633 | "raid10:%s: read error corrected" |
1535 | " (%d sectors at %llu on %s)\n", | 1634 | " (%d sectors at %llu on %s)\n", |
@@ -1537,6 +1636,7 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1537 | (unsigned long long)(sect+ | 1636 | (unsigned long long)(sect+ |
1538 | rdev->data_offset), | 1637 | rdev->data_offset), |
1539 | bdevname(rdev->bdev, b)); | 1638 | bdevname(rdev->bdev, b)); |
1639 | } | ||
1540 | 1640 | ||
1541 | rdev_dec_pending(rdev, mddev); | 1641 | rdev_dec_pending(rdev, mddev); |
1542 | rcu_read_lock(); | 1642 | rcu_read_lock(); |
@@ -2275,13 +2375,6 @@ static void raid10_quiesce(mddev_t *mddev, int state) | |||
2275 | lower_barrier(conf); | 2375 | lower_barrier(conf); |
2276 | break; | 2376 | break; |
2277 | } | 2377 | } |
2278 | if (mddev->thread) { | ||
2279 | if (mddev->bitmap) | ||
2280 | mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ; | ||
2281 | else | ||
2282 | mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; | ||
2283 | md_wakeup_thread(mddev->thread); | ||
2284 | } | ||
2285 | } | 2378 | } |
2286 | 2379 | ||
2287 | static struct mdk_personality raid10_personality = | 2380 | static struct mdk_personality raid10_personality = |
@@ -2315,6 +2408,7 @@ static void raid_exit(void) | |||
2315 | module_init(raid_init); | 2408 | module_init(raid_init); |
2316 | module_exit(raid_exit); | 2409 | module_exit(raid_exit); |
2317 | MODULE_LICENSE("GPL"); | 2410 | MODULE_LICENSE("GPL"); |
2411 | MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); | ||
2318 | MODULE_ALIAS("md-personality-9"); /* RAID10 */ | 2412 | MODULE_ALIAS("md-personality-9"); /* RAID10 */ |
2319 | MODULE_ALIAS("md-raid10"); | 2413 | MODULE_ALIAS("md-raid10"); |
2320 | MODULE_ALIAS("md-level-10"); | 2414 | MODULE_ALIAS("md-level-10"); |