diff options
author | majianpeng <majianpeng@gmail.com> | 2012-07-03 01:57:02 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-07-03 01:57:02 -0400 |
commit | 2e8ac30312973dd20e6807365349ecb1c7e0ea45 (patch) | |
tree | 23c672053c6ff1cf92119e7a18d2f5b83dbab5f2 | |
parent | 0232605d987d8230b254aa139805bbb56a7ca30c (diff) |
md/raid456: When read error cannot be recovered, record bad block
We may not be able to fix a bad block if:
- the array is degraded
- the over-write fails.
In these cases we currently eject the device, but we should
record a bad block if possible.
Signed-off-by: majianpeng <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a5135e595866..51169ecd7787 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1743,6 +1743,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1743 | } else { | 1743 | } else { |
1744 | const char *bdn = bdevname(rdev->bdev, b); | 1744 | const char *bdn = bdevname(rdev->bdev, b); |
1745 | int retry = 0; | 1745 | int retry = 0; |
1746 | int set_bad = 0; | ||
1746 | 1747 | ||
1747 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1748 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1748 | atomic_inc(&rdev->read_errors); | 1749 | atomic_inc(&rdev->read_errors); |
@@ -1754,7 +1755,8 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1754 | mdname(conf->mddev), | 1755 | mdname(conf->mddev), |
1755 | (unsigned long long)s, | 1756 | (unsigned long long)s, |
1756 | bdn); | 1757 | bdn); |
1757 | else if (conf->mddev->degraded >= conf->max_degraded) | 1758 | else if (conf->mddev->degraded >= conf->max_degraded) { |
1759 | set_bad = 1; | ||
1758 | printk_ratelimited( | 1760 | printk_ratelimited( |
1759 | KERN_WARNING | 1761 | KERN_WARNING |
1760 | "md/raid:%s: read error not correctable " | 1762 | "md/raid:%s: read error not correctable " |
@@ -1762,8 +1764,9 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1762 | mdname(conf->mddev), | 1764 | mdname(conf->mddev), |
1763 | (unsigned long long)s, | 1765 | (unsigned long long)s, |
1764 | bdn); | 1766 | bdn); |
1765 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1767 | } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { |
1766 | /* Oh, no!!! */ | 1768 | /* Oh, no!!! */ |
1769 | set_bad = 1; | ||
1767 | printk_ratelimited( | 1770 | printk_ratelimited( |
1768 | KERN_WARNING | 1771 | KERN_WARNING |
1769 | "md/raid:%s: read error NOT corrected!! " | 1772 | "md/raid:%s: read error NOT corrected!! " |
@@ -1771,7 +1774,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1771 | mdname(conf->mddev), | 1774 | mdname(conf->mddev), |
1772 | (unsigned long long)s, | 1775 | (unsigned long long)s, |
1773 | bdn); | 1776 | bdn); |
1774 | else if (atomic_read(&rdev->read_errors) | 1777 | } else if (atomic_read(&rdev->read_errors) |
1775 | > conf->max_nr_stripes) | 1778 | > conf->max_nr_stripes) |
1776 | printk(KERN_WARNING | 1779 | printk(KERN_WARNING |
1777 | "md/raid:%s: Too many read errors, failing device %s.\n", | 1780 | "md/raid:%s: Too many read errors, failing device %s.\n", |
@@ -1783,7 +1786,11 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1783 | else { | 1786 | else { |
1784 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1787 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1785 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1788 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1786 | md_error(conf->mddev, rdev); | 1789 | if (!(set_bad |
1790 | && test_bit(In_sync, &rdev->flags) | ||
1791 | && rdev_set_badblocks( | ||
1792 | rdev, sh->sector, STRIPE_SECTORS, 0))) | ||
1793 | md_error(conf->mddev, rdev); | ||
1787 | } | 1794 | } |
1788 | } | 1795 | } |
1789 | rdev_dec_pending(rdev, conf->mddev); | 1796 | rdev_dec_pending(rdev, conf->mddev); |