diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 670449f7411f..5c71a462c120 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) | |||
1432 | 1432 | ||
1433 | 1433 | ||
1434 | /* | 1434 | /* |
1435 | * Used by fix_read_error() to decay the per rdev read_errors. | ||
1436 | * We halve the read error count for every hour that has elapsed | ||
1437 | * since the last recorded read error. | ||
1438 | * | ||
1439 | */ | ||
1440 | static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) | ||
1441 | { | ||
1442 | struct timespec cur_time_mon; | ||
1443 | unsigned long hours_since_last; | ||
1444 | unsigned int read_errors = atomic_read(&rdev->read_errors); | ||
1445 | |||
1446 | ktime_get_ts(&cur_time_mon); | ||
1447 | |||
1448 | if (rdev->last_read_error.tv_sec == 0 && | ||
1449 | rdev->last_read_error.tv_nsec == 0) { | ||
1450 | /* first time we've seen a read error */ | ||
1451 | rdev->last_read_error = cur_time_mon; | ||
1452 | return; | ||
1453 | } | ||
1454 | |||
1455 | hours_since_last = (cur_time_mon.tv_sec - | ||
1456 | rdev->last_read_error.tv_sec) / 3600; | ||
1457 | |||
1458 | rdev->last_read_error = cur_time_mon; | ||
1459 | |||
1460 | /* | ||
1461 | * if hours_since_last is > the number of bits in read_errors | ||
1462 | * just set read errors to 0. We do this to avoid | ||
1463 | * overflowing the shift of read_errors by hours_since_last. | ||
1464 | */ | ||
1465 | if (hours_since_last >= 8 * sizeof(read_errors)) | ||
1466 | atomic_set(&rdev->read_errors, 0); | ||
1467 | else | ||
1468 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); | ||
1469 | } | ||
1470 | |||
1471 | /* | ||
1435 | * This is a kernel thread which: | 1472 | * This is a kernel thread which: |
1436 | * | 1473 | * |
1437 | * 1. Retries failed read operations on working mirrors. | 1474 | * 1. Retries failed read operations on working mirrors. |
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1444 | int sect = 0; /* Offset from r10_bio->sector */ | 1481 | int sect = 0; /* Offset from r10_bio->sector */ |
1445 | int sectors = r10_bio->sectors; | 1482 | int sectors = r10_bio->sectors; |
1446 | mdk_rdev_t*rdev; | 1483 | mdk_rdev_t*rdev; |
1484 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | ||
1485 | |||
1486 | rcu_read_lock(); | ||
1487 | { | ||
1488 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | ||
1489 | char b[BDEVNAME_SIZE]; | ||
1490 | int cur_read_error_count = 0; | ||
1491 | |||
1492 | rdev = rcu_dereference(conf->mirrors[d].rdev); | ||
1493 | bdevname(rdev->bdev, b); | ||
1494 | |||
1495 | if (test_bit(Faulty, &rdev->flags)) { | ||
1496 | rcu_read_unlock(); | ||
1497 | /* drive has already been failed, just ignore any | ||
1498 | more fix_read_error() attempts */ | ||
1499 | return; | ||
1500 | } | ||
1501 | |||
1502 | check_decay_read_errors(mddev, rdev); | ||
1503 | atomic_inc(&rdev->read_errors); | ||
1504 | cur_read_error_count = atomic_read(&rdev->read_errors); | ||
1505 | if (cur_read_error_count > max_read_errors) { | ||
1506 | rcu_read_unlock(); | ||
1507 | printk(KERN_NOTICE | ||
1508 | "raid10: %s: Raid device exceeded " | ||
1509 | "read_error threshold " | ||
1510 | "[cur %d:max %d]\n", | ||
1511 | b, cur_read_error_count, max_read_errors); | ||
1512 | printk(KERN_NOTICE | ||
1513 | "raid10: %s: Failing raid " | ||
1514 | "device\n", b); | ||
1515 | md_error(mddev, conf->mirrors[d].rdev); | ||
1516 | return; | ||
1517 | } | ||
1518 | } | ||
1519 | rcu_read_unlock(); | ||
1520 | |||
1447 | while(sectors) { | 1521 | while(sectors) { |
1448 | int s = sectors; | 1522 | int s = sectors; |
1449 | int sl = r10_bio->read_slot; | 1523 | int sl = r10_bio->read_slot; |