diff options
-rw-r--r-- | drivers/md/md.c | 34 | ||||
-rw-r--r-- | drivers/md/md.h | 4 | ||||
-rw-r--r-- | drivers/md/raid10.c | 74 |
3 files changed, 112 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 859edbf8c9b0..f1b905a20133 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -68,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
68 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 68 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
69 | 69 | ||
70 | /* | 70 | /* |
71 | * Default number of read corrections we'll attempt on an rdev | ||
72 | * before ejecting it from the array. We divide the read error | ||
73 | * count by 2 for every hour elapsed between read errors. | ||
74 | */ | ||
75 | #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 | ||
76 | /* | ||
71 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 77 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
72 | * is 1000 KB/sec, so the extra system load does not show up that much. | 78 | * is 1000 KB/sec, so the extra system load does not show up that much. |
73 | * Increase it if you want to have more _guaranteed_ speed. Note that | 79 | * Increase it if you want to have more _guaranteed_ speed. Note that |
@@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
2653 | rdev->flags = 0; | 2659 | rdev->flags = 0; |
2654 | rdev->data_offset = 0; | 2660 | rdev->data_offset = 0; |
2655 | rdev->sb_events = 0; | 2661 | rdev->sb_events = 0; |
2662 | rdev->last_read_error.tv_sec = 0; | ||
2663 | rdev->last_read_error.tv_nsec = 0; | ||
2656 | atomic_set(&rdev->nr_pending, 0); | 2664 | atomic_set(&rdev->nr_pending, 0); |
2657 | atomic_set(&rdev->read_errors, 0); | 2665 | atomic_set(&rdev->read_errors, 0); |
2658 | atomic_set(&rdev->corrected_errors, 0); | 2666 | atomic_set(&rdev->corrected_errors, 0); |
@@ -3290,6 +3298,29 @@ static struct md_sysfs_entry md_array_state = | |||
3290 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | 3298 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); |
3291 | 3299 | ||
3292 | static ssize_t | 3300 | static ssize_t |
3301 | max_corrected_read_errors_show(mddev_t *mddev, char *page) { | ||
3302 | return sprintf(page, "%d\n", | ||
3303 | atomic_read(&mddev->max_corr_read_errors)); | ||
3304 | } | ||
3305 | |||
3306 | static ssize_t | ||
3307 | max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len) | ||
3308 | { | ||
3309 | char *e; | ||
3310 | unsigned long n = simple_strtoul(buf, &e, 10); | ||
3311 | |||
3312 | if (*buf && (*e == 0 || *e == '\n')) { | ||
3313 | atomic_set(&mddev->max_corr_read_errors, n); | ||
3314 | return len; | ||
3315 | } | ||
3316 | return -EINVAL; | ||
3317 | } | ||
3318 | |||
3319 | static struct md_sysfs_entry max_corr_read_errors = | ||
3320 | __ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show, | ||
3321 | max_corrected_read_errors_store); | ||
3322 | |||
3323 | static ssize_t | ||
3293 | null_show(mddev_t *mddev, char *page) | 3324 | null_show(mddev_t *mddev, char *page) |
3294 | { | 3325 | { |
3295 | return -EINVAL; | 3326 | return -EINVAL; |
@@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = { | |||
3914 | &md_array_state.attr, | 3945 | &md_array_state.attr, |
3915 | &md_reshape_position.attr, | 3946 | &md_reshape_position.attr, |
3916 | &md_array_size.attr, | 3947 | &md_array_size.attr, |
3948 | &max_corr_read_errors.attr, | ||
3917 | NULL, | 3949 | NULL, |
3918 | }; | 3950 | }; |
3919 | 3951 | ||
@@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev) | |||
4333 | mddev->ro = 0; | 4365 | mddev->ro = 0; |
4334 | 4366 | ||
4335 | atomic_set(&mddev->writes_pending,0); | 4367 | atomic_set(&mddev->writes_pending,0); |
4368 | atomic_set(&mddev->max_corr_read_errors, | ||
4369 | MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); | ||
4336 | mddev->safemode = 0; | 4370 | mddev->safemode = 0; |
4337 | mddev->safemode_timer.function = md_safemode_timeout; | 4371 | mddev->safemode_timer.function = md_safemode_timeout; |
4338 | mddev->safemode_timer.data = (unsigned long) mddev; | 4372 | mddev->safemode_timer.data = (unsigned long) mddev; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index d9138885b87f..8e4c75c00d46 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -97,6 +97,9 @@ struct mdk_rdev_s | |||
97 | atomic_t read_errors; /* number of consecutive read errors that | 97 | atomic_t read_errors; /* number of consecutive read errors that |
98 | * we have tried to ignore. | 98 | * we have tried to ignore. |
99 | */ | 99 | */ |
100 | struct timespec last_read_error; /* monotonic time since our | ||
101 | * last read error | ||
102 | */ | ||
100 | atomic_t corrected_errors; /* number of corrected read errors, | 103 | atomic_t corrected_errors; /* number of corrected read errors, |
101 | * for reporting to userspace and storing | 104 | * for reporting to userspace and storing |
102 | * in superblock. | 105 | * in superblock. |
@@ -299,6 +302,7 @@ struct mddev_s | |||
299 | int external; | 302 | int external; |
300 | } bitmap_info; | 303 | } bitmap_info; |
301 | 304 | ||
305 | atomic_t max_corr_read_errors; /* max read retries */ | ||
302 | struct list_head all_mddevs; | 306 | struct list_head all_mddevs; |
303 | 307 | ||
304 | /* Generic barrier handling. | 308 | /* Generic barrier handling. |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 670449f7411f..5c71a462c120 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) | |||
1432 | 1432 | ||
1433 | 1433 | ||
1434 | /* | 1434 | /* |
1435 | * Used by fix_read_error() to decay the per rdev read_errors. | ||
1436 | * We halve the read error count for every hour that has elapsed | ||
1437 | * since the last recorded read error. | ||
1438 | * | ||
1439 | */ | ||
1440 | static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) | ||
1441 | { | ||
1442 | struct timespec cur_time_mon; | ||
1443 | unsigned long hours_since_last; | ||
1444 | unsigned int read_errors = atomic_read(&rdev->read_errors); | ||
1445 | |||
1446 | ktime_get_ts(&cur_time_mon); | ||
1447 | |||
1448 | if (rdev->last_read_error.tv_sec == 0 && | ||
1449 | rdev->last_read_error.tv_nsec == 0) { | ||
1450 | /* first time we've seen a read error */ | ||
1451 | rdev->last_read_error = cur_time_mon; | ||
1452 | return; | ||
1453 | } | ||
1454 | |||
1455 | hours_since_last = (cur_time_mon.tv_sec - | ||
1456 | rdev->last_read_error.tv_sec) / 3600; | ||
1457 | |||
1458 | rdev->last_read_error = cur_time_mon; | ||
1459 | |||
1460 | /* | ||
1461 | * if hours_since_last is > the number of bits in read_errors | ||
1462 | * just set read errors to 0. We do this to avoid | ||
1463 | * overflowing the shift of read_errors by hours_since_last. | ||
1464 | */ | ||
1465 | if (hours_since_last >= 8 * sizeof(read_errors)) | ||
1466 | atomic_set(&rdev->read_errors, 0); | ||
1467 | else | ||
1468 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); | ||
1469 | } | ||
1470 | |||
1471 | /* | ||
1435 | * This is a kernel thread which: | 1472 | * This is a kernel thread which: |
1436 | * | 1473 | * |
1437 | * 1. Retries failed read operations on working mirrors. | 1474 | * 1. Retries failed read operations on working mirrors. |
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1444 | int sect = 0; /* Offset from r10_bio->sector */ | 1481 | int sect = 0; /* Offset from r10_bio->sector */ |
1445 | int sectors = r10_bio->sectors; | 1482 | int sectors = r10_bio->sectors; |
1446 | mdk_rdev_t*rdev; | 1483 | mdk_rdev_t*rdev; |
1484 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | ||
1485 | |||
1486 | rcu_read_lock(); | ||
1487 | { | ||
1488 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | ||
1489 | char b[BDEVNAME_SIZE]; | ||
1490 | int cur_read_error_count = 0; | ||
1491 | |||
1492 | rdev = rcu_dereference(conf->mirrors[d].rdev); | ||
1493 | bdevname(rdev->bdev, b); | ||
1494 | |||
1495 | if (test_bit(Faulty, &rdev->flags)) { | ||
1496 | rcu_read_unlock(); | ||
1497 | /* drive has already been failed, just ignore any | ||
1498 | more fix_read_error() attempts */ | ||
1499 | return; | ||
1500 | } | ||
1501 | |||
1502 | check_decay_read_errors(mddev, rdev); | ||
1503 | atomic_inc(&rdev->read_errors); | ||
1504 | cur_read_error_count = atomic_read(&rdev->read_errors); | ||
1505 | if (cur_read_error_count > max_read_errors) { | ||
1506 | rcu_read_unlock(); | ||
1507 | printk(KERN_NOTICE | ||
1508 | "raid10: %s: Raid device exceeded " | ||
1509 | "read_error threshold " | ||
1510 | "[cur %d:max %d]\n", | ||
1511 | b, cur_read_error_count, max_read_errors); | ||
1512 | printk(KERN_NOTICE | ||
1513 | "raid10: %s: Failing raid " | ||
1514 | "device\n", b); | ||
1515 | md_error(mddev, conf->mirrors[d].rdev); | ||
1516 | return; | ||
1517 | } | ||
1518 | } | ||
1519 | rcu_read_unlock(); | ||
1520 | |||
1447 | while(sectors) { | 1521 | while(sectors) { |
1448 | int s = sectors; | 1522 | int s = sectors; |
1449 | int sl = r10_bio->read_slot; | 1523 | int sl = r10_bio->read_slot; |