aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c67
1 files changed, 47 insertions, 20 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d26767246d2..04348d76bb3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
196 BUG_ON(!list_empty(&sh->lru)); 196 BUG_ON(!list_empty(&sh->lru));
197 BUG_ON(atomic_read(&conf->active_stripes)==0); 197 BUG_ON(atomic_read(&conf->active_stripes)==0);
198 if (test_bit(STRIPE_HANDLE, &sh->state)) { 198 if (test_bit(STRIPE_HANDLE, &sh->state)) {
199 if (test_bit(STRIPE_DELAYED, &sh->state)) 199 if (test_bit(STRIPE_DELAYED, &sh->state) &&
200 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
200 list_add_tail(&sh->lru, &conf->delayed_list); 201 list_add_tail(&sh->lru, &conf->delayed_list);
201 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 202 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
202 sh->bm_seq - conf->seq_write > 0) 203 sh->bm_seq - conf->seq_write > 0)
203 list_add_tail(&sh->lru, &conf->bitmap_list); 204 list_add_tail(&sh->lru, &conf->bitmap_list);
204 else { 205 else {
206 clear_bit(STRIPE_DELAYED, &sh->state);
205 clear_bit(STRIPE_BIT_DELAY, &sh->state); 207 clear_bit(STRIPE_BIT_DELAY, &sh->state);
206 list_add_tail(&sh->lru, &conf->handle_list); 208 list_add_tail(&sh->lru, &conf->handle_list);
207 } 209 }
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
606 * a chance*/ 608 * a chance*/
607 md_check_recovery(conf->mddev); 609 md_check_recovery(conf->mddev);
608 } 610 }
611 /*
612 * Because md_wait_for_blocked_rdev
613 * will dec nr_pending, we must
614 * increment it first.
615 */
616 atomic_inc(&rdev->nr_pending);
609 md_wait_for_blocked_rdev(rdev, conf->mddev); 617 md_wait_for_blocked_rdev(rdev, conf->mddev);
610 } else { 618 } else {
611 /* Acknowledged bad block - skip the write */ 619 /* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1737 } else { 1745 } else {
1738 const char *bdn = bdevname(rdev->bdev, b); 1746 const char *bdn = bdevname(rdev->bdev, b);
1739 int retry = 0; 1747 int retry = 0;
1748 int set_bad = 0;
1740 1749
1741 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1750 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1742 atomic_inc(&rdev->read_errors); 1751 atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
1748 mdname(conf->mddev), 1757 mdname(conf->mddev),
1749 (unsigned long long)s, 1758 (unsigned long long)s,
1750 bdn); 1759 bdn);
1751 else if (conf->mddev->degraded >= conf->max_degraded) 1760 else if (conf->mddev->degraded >= conf->max_degraded) {
1761 set_bad = 1;
1752 printk_ratelimited( 1762 printk_ratelimited(
1753 KERN_WARNING 1763 KERN_WARNING
1754 "md/raid:%s: read error not correctable " 1764 "md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
1756 mdname(conf->mddev), 1766 mdname(conf->mddev),
1757 (unsigned long long)s, 1767 (unsigned long long)s,
1758 bdn); 1768 bdn);
1759 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1769 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
1760 /* Oh, no!!! */ 1770 /* Oh, no!!! */
1771 set_bad = 1;
1761 printk_ratelimited( 1772 printk_ratelimited(
1762 KERN_WARNING 1773 KERN_WARNING
1763 "md/raid:%s: read error NOT corrected!! " 1774 "md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1765 mdname(conf->mddev), 1776 mdname(conf->mddev),
1766 (unsigned long long)s, 1777 (unsigned long long)s,
1767 bdn); 1778 bdn);
1768 else if (atomic_read(&rdev->read_errors) 1779 } else if (atomic_read(&rdev->read_errors)
1769 > conf->max_nr_stripes) 1780 > conf->max_nr_stripes)
1770 printk(KERN_WARNING 1781 printk(KERN_WARNING
1771 "md/raid:%s: Too many read errors, failing device %s.\n", 1782 "md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
1777 else { 1788 else {
1778 clear_bit(R5_ReadError, &sh->dev[i].flags); 1789 clear_bit(R5_ReadError, &sh->dev[i].flags);
1779 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1790 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1780 md_error(conf->mddev, rdev); 1791 if (!(set_bad
1792 && test_bit(In_sync, &rdev->flags)
1793 && rdev_set_badblocks(
1794 rdev, sh->sector, STRIPE_SECTORS, 0)))
1795 md_error(conf->mddev, rdev);
1781 } 1796 }
1782 } 1797 }
1783 rdev_dec_pending(rdev, conf->mddev); 1798 rdev_dec_pending(rdev, conf->mddev);
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)
3582 3597
3583finish: 3598finish:
3584 /* wait for this device to become unblocked */ 3599 /* wait for this device to become unblocked */
3585 if (conf->mddev->external && unlikely(s.blocked_rdev)) 3600 if (unlikely(s.blocked_rdev)) {
3586 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); 3601 if (conf->mddev->external)
3602 md_wait_for_blocked_rdev(s.blocked_rdev,
3603 conf->mddev);
3604 else
3605 /* Internal metadata will immediately
3606 * be written by raid5d, so we don't
3607 * need to wait here.
3608 */
3609 rdev_dec_pending(s.blocked_rdev,
3610 conf->mddev);
3611 }
3587 3612
3588 if (s.handle_bad_blocks) 3613 if (s.handle_bad_blocks)
3589 for (i = disks; i--; ) { 3614 for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3881 raid_bio->bi_next = (void*)rdev; 3906 raid_bio->bi_next = (void*)rdev;
3882 align_bi->bi_bdev = rdev->bdev; 3907 align_bi->bi_bdev = rdev->bdev;
3883 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 3908 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
3884 /* No reshape active, so we can trust rdev->data_offset */
3885 align_bi->bi_sector += rdev->data_offset;
3886 3909
3887 if (!bio_fits_rdev(align_bi) || 3910 if (!bio_fits_rdev(align_bi) ||
3888 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, 3911 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3893 return 0; 3916 return 0;
3894 } 3917 }
3895 3918
3919 /* No reshape active, so we can trust rdev->data_offset */
3920 align_bi->bi_sector += rdev->data_offset;
3921
3896 spin_lock_irq(&conf->device_lock); 3922 spin_lock_irq(&conf->device_lock);
3897 wait_event_lock_irq(conf->wait_for_stripe, 3923 wait_event_lock_irq(conf->wait_for_stripe,
3898 conf->quiesce == 0, 3924 conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3971 struct stripe_head *sh; 3997 struct stripe_head *sh;
3972 const int rw = bio_data_dir(bi); 3998 const int rw = bio_data_dir(bi);
3973 int remaining; 3999 int remaining;
3974 int plugged;
3975 4000
3976 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 4001 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
3977 md_flush_request(mddev, bi); 4002 md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3990 bi->bi_next = NULL; 4015 bi->bi_next = NULL;
3991 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4016 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
3992 4017
3993 plugged = mddev_check_plugged(mddev);
3994 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 4018 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
3995 DEFINE_WAIT(w); 4019 DEFINE_WAIT(w);
3996 int previous; 4020 int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4092 if ((bi->bi_rw & REQ_SYNC) && 4116 if ((bi->bi_rw & REQ_SYNC) &&
4093 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4117 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4094 atomic_inc(&conf->preread_active_stripes); 4118 atomic_inc(&conf->preread_active_stripes);
4119 mddev_check_plugged(mddev);
4095 release_stripe(sh); 4120 release_stripe(sh);
4096 } else { 4121 } else {
4097 /* cannot get stripe for read-ahead, just give-up */ 4122 /* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4099 finish_wait(&conf->wait_for_overlap, &w); 4124 finish_wait(&conf->wait_for_overlap, &w);
4100 break; 4125 break;
4101 } 4126 }
4102
4103 } 4127 }
4104 if (!plugged)
4105 md_wakeup_thread(mddev->thread);
4106 4128
4107 spin_lock_irq(&conf->device_lock); 4129 spin_lock_irq(&conf->device_lock);
4108 remaining = raid5_dec_bi_phys_segments(bi); 4130 remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4823 int raid_disk, memory, max_disks; 4845 int raid_disk, memory, max_disks;
4824 struct md_rdev *rdev; 4846 struct md_rdev *rdev;
4825 struct disk_info *disk; 4847 struct disk_info *disk;
4848 char pers_name[6];
4826 4849
4827 if (mddev->new_level != 5 4850 if (mddev->new_level != 5
4828 && mddev->new_level != 4 4851 && mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4946 printk(KERN_INFO "md/raid:%s: allocated %dkB\n", 4969 printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
4947 mdname(mddev), memory); 4970 mdname(mddev), memory);
4948 4971
4949 conf->thread = md_register_thread(raid5d, mddev, NULL); 4972 sprintf(pers_name, "raid%d", mddev->new_level);
4973 conf->thread = md_register_thread(raid5d, mddev, pers_name);
4950 if (!conf->thread) { 4974 if (!conf->thread) {
4951 printk(KERN_ERR 4975 printk(KERN_ERR
4952 "md/raid:%s: couldn't allocate thread.\n", 4976 "md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5465 if (rdev->saved_raid_disk >= 0 && 5489 if (rdev->saved_raid_disk >= 0 &&
5466 rdev->saved_raid_disk >= first && 5490 rdev->saved_raid_disk >= first &&
5467 conf->disks[rdev->saved_raid_disk].rdev == NULL) 5491 conf->disks[rdev->saved_raid_disk].rdev == NULL)
5468 disk = rdev->saved_raid_disk; 5492 first = rdev->saved_raid_disk;
5469 else 5493
5470 disk = first; 5494 for (disk = first; disk <= last; disk++) {
5471 for ( ; disk <= last ; disk++) {
5472 p = conf->disks + disk; 5495 p = conf->disks + disk;
5473 if (p->rdev == NULL) { 5496 if (p->rdev == NULL) {
5474 clear_bit(In_sync, &rdev->flags); 5497 clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5477 if (rdev->saved_raid_disk != disk) 5500 if (rdev->saved_raid_disk != disk)
5478 conf->fullsync = 1; 5501 conf->fullsync = 1;
5479 rcu_assign_pointer(p->rdev, rdev); 5502 rcu_assign_pointer(p->rdev, rdev);
5480 break; 5503 goto out;
5481 } 5504 }
5505 }
5506 for (disk = first; disk <= last; disk++) {
5507 p = conf->disks + disk;
5482 if (test_bit(WantReplacement, &p->rdev->flags) && 5508 if (test_bit(WantReplacement, &p->rdev->flags) &&
5483 p->replacement == NULL) { 5509 p->replacement == NULL) {
5484 clear_bit(In_sync, &rdev->flags); 5510 clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5490 break; 5516 break;
5491 } 5517 }
5492 } 5518 }
5519out:
5493 print_raid5_conf(conf); 5520 print_raid5_conf(conf);
5494 return err; 5521 return err;
5495} 5522}