diff options
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/md/md.c | 34 | ||||
| -rw-r--r-- | drivers/md/md.h | 4 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 74 |
3 files changed, 112 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 859edbf8c9b0..f1b905a20133 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -68,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
| 68 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 68 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
| 69 | 69 | ||
| 70 | /* | 70 | /* |
| 71 | * Default number of read corrections we'll attempt on an rdev | ||
| 72 | * before ejecting it from the array. We divide the read error | ||
| 73 | * count by 2 for every hour elapsed between read errors. | ||
| 74 | */ | ||
| 75 | #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 | ||
| 76 | /* | ||
| 71 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 77 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
| 72 | * is 1000 KB/sec, so the extra system load does not show up that much. | 78 | * is 1000 KB/sec, so the extra system load does not show up that much. |
| 73 | * Increase it if you want to have more _guaranteed_ speed. Note that | 79 | * Increase it if you want to have more _guaranteed_ speed. Note that |
| @@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi | |||
| 2653 | rdev->flags = 0; | 2659 | rdev->flags = 0; |
| 2654 | rdev->data_offset = 0; | 2660 | rdev->data_offset = 0; |
| 2655 | rdev->sb_events = 0; | 2661 | rdev->sb_events = 0; |
| 2662 | rdev->last_read_error.tv_sec = 0; | ||
| 2663 | rdev->last_read_error.tv_nsec = 0; | ||
| 2656 | atomic_set(&rdev->nr_pending, 0); | 2664 | atomic_set(&rdev->nr_pending, 0); |
| 2657 | atomic_set(&rdev->read_errors, 0); | 2665 | atomic_set(&rdev->read_errors, 0); |
| 2658 | atomic_set(&rdev->corrected_errors, 0); | 2666 | atomic_set(&rdev->corrected_errors, 0); |
| @@ -3290,6 +3298,29 @@ static struct md_sysfs_entry md_array_state = | |||
| 3290 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | 3298 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); |
| 3291 | 3299 | ||
| 3292 | static ssize_t | 3300 | static ssize_t |
| 3301 | max_corrected_read_errors_show(mddev_t *mddev, char *page) { | ||
| 3302 | return sprintf(page, "%d\n", | ||
| 3303 | atomic_read(&mddev->max_corr_read_errors)); | ||
| 3304 | } | ||
| 3305 | |||
| 3306 | static ssize_t | ||
| 3307 | max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len) | ||
| 3308 | { | ||
| 3309 | char *e; | ||
| 3310 | unsigned long n = simple_strtoul(buf, &e, 10); | ||
| 3311 | |||
| 3312 | if (*buf && (*e == 0 || *e == '\n')) { | ||
| 3313 | atomic_set(&mddev->max_corr_read_errors, n); | ||
| 3314 | return len; | ||
| 3315 | } | ||
| 3316 | return -EINVAL; | ||
| 3317 | } | ||
| 3318 | |||
| 3319 | static struct md_sysfs_entry max_corr_read_errors = | ||
| 3320 | __ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show, | ||
| 3321 | max_corrected_read_errors_store); | ||
| 3322 | |||
| 3323 | static ssize_t | ||
| 3293 | null_show(mddev_t *mddev, char *page) | 3324 | null_show(mddev_t *mddev, char *page) |
| 3294 | { | 3325 | { |
| 3295 | return -EINVAL; | 3326 | return -EINVAL; |
| @@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = { | |||
| 3914 | &md_array_state.attr, | 3945 | &md_array_state.attr, |
| 3915 | &md_reshape_position.attr, | 3946 | &md_reshape_position.attr, |
| 3916 | &md_array_size.attr, | 3947 | &md_array_size.attr, |
| 3948 | &max_corr_read_errors.attr, | ||
| 3917 | NULL, | 3949 | NULL, |
| 3918 | }; | 3950 | }; |
| 3919 | 3951 | ||
| @@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev) | |||
| 4333 | mddev->ro = 0; | 4365 | mddev->ro = 0; |
| 4334 | 4366 | ||
| 4335 | atomic_set(&mddev->writes_pending,0); | 4367 | atomic_set(&mddev->writes_pending,0); |
| 4368 | atomic_set(&mddev->max_corr_read_errors, | ||
| 4369 | MD_DEFAULT_MAX_CORRECTED_READ_ERRORS); | ||
| 4336 | mddev->safemode = 0; | 4370 | mddev->safemode = 0; |
| 4337 | mddev->safemode_timer.function = md_safemode_timeout; | 4371 | mddev->safemode_timer.function = md_safemode_timeout; |
| 4338 | mddev->safemode_timer.data = (unsigned long) mddev; | 4372 | mddev->safemode_timer.data = (unsigned long) mddev; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index d9138885b87f..8e4c75c00d46 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -97,6 +97,9 @@ struct mdk_rdev_s | |||
| 97 | atomic_t read_errors; /* number of consecutive read errors that | 97 | atomic_t read_errors; /* number of consecutive read errors that |
| 98 | * we have tried to ignore. | 98 | * we have tried to ignore. |
| 99 | */ | 99 | */ |
| 100 | struct timespec last_read_error; /* monotonic time since our | ||
| 101 | * last read error | ||
| 102 | */ | ||
| 100 | atomic_t corrected_errors; /* number of corrected read errors, | 103 | atomic_t corrected_errors; /* number of corrected read errors, |
| 101 | * for reporting to userspace and storing | 104 | * for reporting to userspace and storing |
| 102 | * in superblock. | 105 | * in superblock. |
| @@ -299,6 +302,7 @@ struct mddev_s | |||
| 299 | int external; | 302 | int external; |
| 300 | } bitmap_info; | 303 | } bitmap_info; |
| 301 | 304 | ||
| 305 | atomic_t max_corr_read_errors; /* max read retries */ | ||
| 302 | struct list_head all_mddevs; | 306 | struct list_head all_mddevs; |
| 303 | 307 | ||
| 304 | /* Generic barrier handling. | 308 | /* Generic barrier handling. |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 670449f7411f..5c71a462c120 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) | |||
| 1432 | 1432 | ||
| 1433 | 1433 | ||
| 1434 | /* | 1434 | /* |
| 1435 | * Used by fix_read_error() to decay the per rdev read_errors. | ||
| 1436 | * We halve the read error count for every hour that has elapsed | ||
| 1437 | * since the last recorded read error. | ||
| 1438 | * | ||
| 1439 | */ | ||
| 1440 | static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) | ||
| 1441 | { | ||
| 1442 | struct timespec cur_time_mon; | ||
| 1443 | unsigned long hours_since_last; | ||
| 1444 | unsigned int read_errors = atomic_read(&rdev->read_errors); | ||
| 1445 | |||
| 1446 | ktime_get_ts(&cur_time_mon); | ||
| 1447 | |||
| 1448 | if (rdev->last_read_error.tv_sec == 0 && | ||
| 1449 | rdev->last_read_error.tv_nsec == 0) { | ||
| 1450 | /* first time we've seen a read error */ | ||
| 1451 | rdev->last_read_error = cur_time_mon; | ||
| 1452 | return; | ||
| 1453 | } | ||
| 1454 | |||
| 1455 | hours_since_last = (cur_time_mon.tv_sec - | ||
| 1456 | rdev->last_read_error.tv_sec) / 3600; | ||
| 1457 | |||
| 1458 | rdev->last_read_error = cur_time_mon; | ||
| 1459 | |||
| 1460 | /* | ||
| 1461 | * if hours_since_last is > the number of bits in read_errors | ||
| 1462 | * just set read errors to 0. We do this to avoid | ||
| 1463 | * overflowing the shift of read_errors by hours_since_last. | ||
| 1464 | */ | ||
| 1465 | if (hours_since_last >= 8 * sizeof(read_errors)) | ||
| 1466 | atomic_set(&rdev->read_errors, 0); | ||
| 1467 | else | ||
| 1468 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); | ||
| 1469 | } | ||
| 1470 | |||
| 1471 | /* | ||
| 1435 | * This is a kernel thread which: | 1472 | * This is a kernel thread which: |
| 1436 | * | 1473 | * |
| 1437 | * 1. Retries failed read operations on working mirrors. | 1474 | * 1. Retries failed read operations on working mirrors. |
| @@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
| 1444 | int sect = 0; /* Offset from r10_bio->sector */ | 1481 | int sect = 0; /* Offset from r10_bio->sector */ |
| 1445 | int sectors = r10_bio->sectors; | 1482 | int sectors = r10_bio->sectors; |
| 1446 | mdk_rdev_t*rdev; | 1483 | mdk_rdev_t*rdev; |
| 1484 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | ||
| 1485 | |||
| 1486 | rcu_read_lock(); | ||
| 1487 | { | ||
| 1488 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | ||
| 1489 | char b[BDEVNAME_SIZE]; | ||
| 1490 | int cur_read_error_count = 0; | ||
| 1491 | |||
| 1492 | rdev = rcu_dereference(conf->mirrors[d].rdev); | ||
| 1493 | bdevname(rdev->bdev, b); | ||
| 1494 | |||
| 1495 | if (test_bit(Faulty, &rdev->flags)) { | ||
| 1496 | rcu_read_unlock(); | ||
| 1497 | /* drive has already been failed, just ignore any | ||
| 1498 | more fix_read_error() attempts */ | ||
| 1499 | return; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | check_decay_read_errors(mddev, rdev); | ||
| 1503 | atomic_inc(&rdev->read_errors); | ||
| 1504 | cur_read_error_count = atomic_read(&rdev->read_errors); | ||
| 1505 | if (cur_read_error_count > max_read_errors) { | ||
| 1506 | rcu_read_unlock(); | ||
| 1507 | printk(KERN_NOTICE | ||
| 1508 | "raid10: %s: Raid device exceeded " | ||
| 1509 | "read_error threshold " | ||
| 1510 | "[cur %d:max %d]\n", | ||
| 1511 | b, cur_read_error_count, max_read_errors); | ||
| 1512 | printk(KERN_NOTICE | ||
| 1513 | "raid10: %s: Failing raid " | ||
| 1514 | "device\n", b); | ||
| 1515 | md_error(mddev, conf->mirrors[d].rdev); | ||
| 1516 | return; | ||
| 1517 | } | ||
| 1518 | } | ||
| 1519 | rcu_read_unlock(); | ||
| 1520 | |||
| 1447 | while(sectors) { | 1521 | while(sectors) { |
| 1448 | int s = sectors; | 1522 | int s = sectors; |
| 1449 | int sl = r10_bio->read_slot; | 1523 | int sl = r10_bio->read_slot; |
