aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/md.c34
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid10.c74
3 files changed, 112 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 859edbf8c9b0..f1b905a20133 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -68,6 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 68#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
69 69
70/* 70/*
71 * Default number of read corrections we'll attempt on an rdev
72 * before ejecting it from the array. We divide the read error
73 * count by 2 for every hour elapsed between read errors.
74 */
75#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
76/*
71 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 77 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
72 * is 1000 KB/sec, so the extra system load does not show up that much. 78 * is 1000 KB/sec, so the extra system load does not show up that much.
73 * Increase it if you want to have more _guaranteed_ speed. Note that 79 * Increase it if you want to have more _guaranteed_ speed. Note that
@@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
2653 rdev->flags = 0; 2659 rdev->flags = 0;
2654 rdev->data_offset = 0; 2660 rdev->data_offset = 0;
2655 rdev->sb_events = 0; 2661 rdev->sb_events = 0;
2662 rdev->last_read_error.tv_sec = 0;
2663 rdev->last_read_error.tv_nsec = 0;
2656 atomic_set(&rdev->nr_pending, 0); 2664 atomic_set(&rdev->nr_pending, 0);
2657 atomic_set(&rdev->read_errors, 0); 2665 atomic_set(&rdev->read_errors, 0);
2658 atomic_set(&rdev->corrected_errors, 0); 2666 atomic_set(&rdev->corrected_errors, 0);
@@ -3290,6 +3298,29 @@ static struct md_sysfs_entry md_array_state =
3290__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); 3298__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3291 3299
3292static ssize_t 3300static ssize_t
3301max_corrected_read_errors_show(mddev_t *mddev, char *page) {
3302 return sprintf(page, "%d\n",
3303 atomic_read(&mddev->max_corr_read_errors));
3304}
3305
3306static ssize_t
3307max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
3308{
3309 char *e;
3310 unsigned long n = simple_strtoul(buf, &e, 10);
3311
3312 if (*buf && (*e == 0 || *e == '\n')) {
3313 atomic_set(&mddev->max_corr_read_errors, n);
3314 return len;
3315 }
3316 return -EINVAL;
3317}
3318
3319static struct md_sysfs_entry max_corr_read_errors =
3320__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
3321 max_corrected_read_errors_store);
3322
3323static ssize_t
3293null_show(mddev_t *mddev, char *page) 3324null_show(mddev_t *mddev, char *page)
3294{ 3325{
3295 return -EINVAL; 3326 return -EINVAL;
@@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = {
3914 &md_array_state.attr, 3945 &md_array_state.attr,
3915 &md_reshape_position.attr, 3946 &md_reshape_position.attr,
3916 &md_array_size.attr, 3947 &md_array_size.attr,
3948 &max_corr_read_errors.attr,
3917 NULL, 3949 NULL,
3918}; 3950};
3919 3951
@@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev)
4333 mddev->ro = 0; 4365 mddev->ro = 0;
4334 4366
4335 atomic_set(&mddev->writes_pending,0); 4367 atomic_set(&mddev->writes_pending,0);
4368 atomic_set(&mddev->max_corr_read_errors,
4369 MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
4336 mddev->safemode = 0; 4370 mddev->safemode = 0;
4337 mddev->safemode_timer.function = md_safemode_timeout; 4371 mddev->safemode_timer.function = md_safemode_timeout;
4338 mddev->safemode_timer.data = (unsigned long) mddev; 4372 mddev->safemode_timer.data = (unsigned long) mddev;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index d9138885b87f..8e4c75c00d46 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -97,6 +97,9 @@ struct mdk_rdev_s
97 atomic_t read_errors; /* number of consecutive read errors that 97 atomic_t read_errors; /* number of consecutive read errors that
98 * we have tried to ignore. 98 * we have tried to ignore.
99 */ 99 */
100 struct timespec last_read_error; /* monotonic time since our
101 * last read error
102 */
100 atomic_t corrected_errors; /* number of corrected read errors, 103 atomic_t corrected_errors; /* number of corrected read errors,
101 * for reporting to userspace and storing 104 * for reporting to userspace and storing
102 * in superblock. 105 * in superblock.
@@ -299,6 +302,7 @@ struct mddev_s
299 int external; 302 int external;
300 } bitmap_info; 303 } bitmap_info;
301 304
305 atomic_t max_corr_read_errors; /* max read retries */
302 struct list_head all_mddevs; 306 struct list_head all_mddevs;
303 307
304 /* Generic barrier handling. 308 /* Generic barrier handling.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 670449f7411f..5c71a462c120 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1432,6 +1432,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1432 1432
1433 1433
1434/* 1434/*
1435 * Used by fix_read_error() to decay the per rdev read_errors.
1436 * We halve the read error count for every hour that has elapsed
1437 * since the last recorded read error.
1438 *
1439 */
1440static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
1441{
1442 struct timespec cur_time_mon;
1443 unsigned long hours_since_last;
1444 unsigned int read_errors = atomic_read(&rdev->read_errors);
1445
1446 ktime_get_ts(&cur_time_mon);
1447
1448 if (rdev->last_read_error.tv_sec == 0 &&
1449 rdev->last_read_error.tv_nsec == 0) {
1450 /* first time we've seen a read error */
1451 rdev->last_read_error = cur_time_mon;
1452 return;
1453 }
1454
1455 hours_since_last = (cur_time_mon.tv_sec -
1456 rdev->last_read_error.tv_sec) / 3600;
1457
1458 rdev->last_read_error = cur_time_mon;
1459
1460 /*
1461 * if hours_since_last is > the number of bits in read_errors
1462 * just set read errors to 0. We do this to avoid
1463 * overflowing the shift of read_errors by hours_since_last.
1464 */
1465 if (hours_since_last >= 8 * sizeof(read_errors))
1466 atomic_set(&rdev->read_errors, 0);
1467 else
1468 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
1469}
1470
1471/*
1435 * This is a kernel thread which: 1472 * This is a kernel thread which:
1436 * 1473 *
1437 * 1. Retries failed read operations on working mirrors. 1474 * 1. Retries failed read operations on working mirrors.
@@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1444 int sect = 0; /* Offset from r10_bio->sector */ 1481 int sect = 0; /* Offset from r10_bio->sector */
1445 int sectors = r10_bio->sectors; 1482 int sectors = r10_bio->sectors;
1446 mdk_rdev_t*rdev; 1483 mdk_rdev_t*rdev;
1484 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
1485
1486 rcu_read_lock();
1487 {
1488 int d = r10_bio->devs[r10_bio->read_slot].devnum;
1489 char b[BDEVNAME_SIZE];
1490 int cur_read_error_count = 0;
1491
1492 rdev = rcu_dereference(conf->mirrors[d].rdev);
1493 bdevname(rdev->bdev, b);
1494
1495 if (test_bit(Faulty, &rdev->flags)) {
1496 rcu_read_unlock();
1497 /* drive has already been failed, just ignore any
1498 more fix_read_error() attempts */
1499 return;
1500 }
1501
1502 check_decay_read_errors(mddev, rdev);
1503 atomic_inc(&rdev->read_errors);
1504 cur_read_error_count = atomic_read(&rdev->read_errors);
1505 if (cur_read_error_count > max_read_errors) {
1506 rcu_read_unlock();
1507 printk(KERN_NOTICE
1508 "raid10: %s: Raid device exceeded "
1509 "read_error threshold "
1510 "[cur %d:max %d]\n",
1511 b, cur_read_error_count, max_read_errors);
1512 printk(KERN_NOTICE
1513 "raid10: %s: Failing raid "
1514 "device\n", b);
1515 md_error(mddev, conf->mirrors[d].rdev);
1516 return;
1517 }
1518 }
1519 rcu_read_unlock();
1520
1447 while(sectors) { 1521 while(sectors) {
1448 int s = sectors; 1522 int s = sectors;
1449 int sl = r10_bio->read_slot; 1523 int sl = r10_bio->read_slot;