diff options
Diffstat (limited to 'drivers/md/raid5.c')
| -rw-r--r-- | drivers/md/raid5.c | 67 |
1 files changed, 47 insertions, 20 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d26767246d26..04348d76bb30 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | |||
| 196 | BUG_ON(!list_empty(&sh->lru)); | 196 | BUG_ON(!list_empty(&sh->lru)); |
| 197 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 197 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
| 198 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 198 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
| 199 | if (test_bit(STRIPE_DELAYED, &sh->state)) | 199 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
| 200 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
| 200 | list_add_tail(&sh->lru, &conf->delayed_list); | 201 | list_add_tail(&sh->lru, &conf->delayed_list); |
| 201 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 202 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
| 202 | sh->bm_seq - conf->seq_write > 0) | 203 | sh->bm_seq - conf->seq_write > 0) |
| 203 | list_add_tail(&sh->lru, &conf->bitmap_list); | 204 | list_add_tail(&sh->lru, &conf->bitmap_list); |
| 204 | else { | 205 | else { |
| 206 | clear_bit(STRIPE_DELAYED, &sh->state); | ||
| 205 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 207 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
| 206 | list_add_tail(&sh->lru, &conf->handle_list); | 208 | list_add_tail(&sh->lru, &conf->handle_list); |
| 207 | } | 209 | } |
| @@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
| 606 | * a chance*/ | 608 | * a chance*/ |
| 607 | md_check_recovery(conf->mddev); | 609 | md_check_recovery(conf->mddev); |
| 608 | } | 610 | } |
| 611 | /* | ||
| 612 | * Because md_wait_for_blocked_rdev | ||
| 613 | * will dec nr_pending, we must | ||
| 614 | * increment it first. | ||
| 615 | */ | ||
| 616 | atomic_inc(&rdev->nr_pending); | ||
| 609 | md_wait_for_blocked_rdev(rdev, conf->mddev); | 617 | md_wait_for_blocked_rdev(rdev, conf->mddev); |
| 610 | } else { | 618 | } else { |
| 611 | /* Acknowledged bad block - skip the write */ | 619 | /* Acknowledged bad block - skip the write */ |
| @@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
| 1737 | } else { | 1745 | } else { |
| 1738 | const char *bdn = bdevname(rdev->bdev, b); | 1746 | const char *bdn = bdevname(rdev->bdev, b); |
| 1739 | int retry = 0; | 1747 | int retry = 0; |
| 1748 | int set_bad = 0; | ||
| 1740 | 1749 | ||
| 1741 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1750 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
| 1742 | atomic_inc(&rdev->read_errors); | 1751 | atomic_inc(&rdev->read_errors); |
| @@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
| 1748 | mdname(conf->mddev), | 1757 | mdname(conf->mddev), |
| 1749 | (unsigned long long)s, | 1758 | (unsigned long long)s, |
| 1750 | bdn); | 1759 | bdn); |
| 1751 | else if (conf->mddev->degraded >= conf->max_degraded) | 1760 | else if (conf->mddev->degraded >= conf->max_degraded) { |
| 1761 | set_bad = 1; | ||
| 1752 | printk_ratelimited( | 1762 | printk_ratelimited( |
| 1753 | KERN_WARNING | 1763 | KERN_WARNING |
| 1754 | "md/raid:%s: read error not correctable " | 1764 | "md/raid:%s: read error not correctable " |
| @@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
| 1756 | mdname(conf->mddev), | 1766 | mdname(conf->mddev), |
| 1757 | (unsigned long long)s, | 1767 | (unsigned long long)s, |
| 1758 | bdn); | 1768 | bdn); |
| 1759 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | 1769 | } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) { |
| 1760 | /* Oh, no!!! */ | 1770 | /* Oh, no!!! */ |
| 1771 | set_bad = 1; | ||
| 1761 | printk_ratelimited( | 1772 | printk_ratelimited( |
| 1762 | KERN_WARNING | 1773 | KERN_WARNING |
| 1763 | "md/raid:%s: read error NOT corrected!! " | 1774 | "md/raid:%s: read error NOT corrected!! " |
| @@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
| 1765 | mdname(conf->mddev), | 1776 | mdname(conf->mddev), |
| 1766 | (unsigned long long)s, | 1777 | (unsigned long long)s, |
| 1767 | bdn); | 1778 | bdn); |
| 1768 | else if (atomic_read(&rdev->read_errors) | 1779 | } else if (atomic_read(&rdev->read_errors) |
| 1769 | > conf->max_nr_stripes) | 1780 | > conf->max_nr_stripes) |
| 1770 | printk(KERN_WARNING | 1781 | printk(KERN_WARNING |
| 1771 | "md/raid:%s: Too many read errors, failing device %s.\n", | 1782 | "md/raid:%s: Too many read errors, failing device %s.\n", |
| @@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
| 1777 | else { | 1788 | else { |
| 1778 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1789 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
| 1779 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1790 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
| 1780 | md_error(conf->mddev, rdev); | 1791 | if (!(set_bad |
| 1792 | && test_bit(In_sync, &rdev->flags) | ||
| 1793 | && rdev_set_badblocks( | ||
| 1794 | rdev, sh->sector, STRIPE_SECTORS, 0))) | ||
| 1795 | md_error(conf->mddev, rdev); | ||
| 1781 | } | 1796 | } |
| 1782 | } | 1797 | } |
| 1783 | rdev_dec_pending(rdev, conf->mddev); | 1798 | rdev_dec_pending(rdev, conf->mddev); |
| @@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh) | |||
| 3582 | 3597 | ||
| 3583 | finish: | 3598 | finish: |
| 3584 | /* wait for this device to become unblocked */ | 3599 | /* wait for this device to become unblocked */ |
| 3585 | if (conf->mddev->external && unlikely(s.blocked_rdev)) | 3600 | if (unlikely(s.blocked_rdev)) { |
| 3586 | md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); | 3601 | if (conf->mddev->external) |
| 3602 | md_wait_for_blocked_rdev(s.blocked_rdev, | ||
| 3603 | conf->mddev); | ||
| 3604 | else | ||
| 3605 | /* Internal metadata will immediately | ||
| 3606 | * be written by raid5d, so we don't | ||
| 3607 | * need to wait here. | ||
| 3608 | */ | ||
| 3609 | rdev_dec_pending(s.blocked_rdev, | ||
| 3610 | conf->mddev); | ||
| 3611 | } | ||
| 3587 | 3612 | ||
| 3588 | if (s.handle_bad_blocks) | 3613 | if (s.handle_bad_blocks) |
| 3589 | for (i = disks; i--; ) { | 3614 | for (i = disks; i--; ) { |
| @@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
| 3881 | raid_bio->bi_next = (void*)rdev; | 3906 | raid_bio->bi_next = (void*)rdev; |
| 3882 | align_bi->bi_bdev = rdev->bdev; | 3907 | align_bi->bi_bdev = rdev->bdev; |
| 3883 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); | 3908 | align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); |
| 3884 | /* No reshape active, so we can trust rdev->data_offset */ | ||
| 3885 | align_bi->bi_sector += rdev->data_offset; | ||
| 3886 | 3909 | ||
| 3887 | if (!bio_fits_rdev(align_bi) || | 3910 | if (!bio_fits_rdev(align_bi) || |
| 3888 | is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, | 3911 | is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, |
| @@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
| 3893 | return 0; | 3916 | return 0; |
| 3894 | } | 3917 | } |
| 3895 | 3918 | ||
| 3919 | /* No reshape active, so we can trust rdev->data_offset */ | ||
| 3920 | align_bi->bi_sector += rdev->data_offset; | ||
| 3921 | |||
| 3896 | spin_lock_irq(&conf->device_lock); | 3922 | spin_lock_irq(&conf->device_lock); |
| 3897 | wait_event_lock_irq(conf->wait_for_stripe, | 3923 | wait_event_lock_irq(conf->wait_for_stripe, |
| 3898 | conf->quiesce == 0, | 3924 | conf->quiesce == 0, |
| @@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 3971 | struct stripe_head *sh; | 3997 | struct stripe_head *sh; |
| 3972 | const int rw = bio_data_dir(bi); | 3998 | const int rw = bio_data_dir(bi); |
| 3973 | int remaining; | 3999 | int remaining; |
| 3974 | int plugged; | ||
| 3975 | 4000 | ||
| 3976 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 4001 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
| 3977 | md_flush_request(mddev, bi); | 4002 | md_flush_request(mddev, bi); |
| @@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 3990 | bi->bi_next = NULL; | 4015 | bi->bi_next = NULL; |
| 3991 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 4016 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
| 3992 | 4017 | ||
| 3993 | plugged = mddev_check_plugged(mddev); | ||
| 3994 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 4018 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
| 3995 | DEFINE_WAIT(w); | 4019 | DEFINE_WAIT(w); |
| 3996 | int previous; | 4020 | int previous; |
| @@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 4092 | if ((bi->bi_rw & REQ_SYNC) && | 4116 | if ((bi->bi_rw & REQ_SYNC) && |
| 4093 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4117 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
| 4094 | atomic_inc(&conf->preread_active_stripes); | 4118 | atomic_inc(&conf->preread_active_stripes); |
| 4119 | mddev_check_plugged(mddev); | ||
| 4095 | release_stripe(sh); | 4120 | release_stripe(sh); |
| 4096 | } else { | 4121 | } else { |
| 4097 | /* cannot get stripe for read-ahead, just give-up */ | 4122 | /* cannot get stripe for read-ahead, just give-up */ |
| @@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 4099 | finish_wait(&conf->wait_for_overlap, &w); | 4124 | finish_wait(&conf->wait_for_overlap, &w); |
| 4100 | break; | 4125 | break; |
| 4101 | } | 4126 | } |
| 4102 | |||
| 4103 | } | 4127 | } |
| 4104 | if (!plugged) | ||
| 4105 | md_wakeup_thread(mddev->thread); | ||
| 4106 | 4128 | ||
| 4107 | spin_lock_irq(&conf->device_lock); | 4129 | spin_lock_irq(&conf->device_lock); |
| 4108 | remaining = raid5_dec_bi_phys_segments(bi); | 4130 | remaining = raid5_dec_bi_phys_segments(bi); |
| @@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
| 4823 | int raid_disk, memory, max_disks; | 4845 | int raid_disk, memory, max_disks; |
| 4824 | struct md_rdev *rdev; | 4846 | struct md_rdev *rdev; |
| 4825 | struct disk_info *disk; | 4847 | struct disk_info *disk; |
| 4848 | char pers_name[6]; | ||
| 4826 | 4849 | ||
| 4827 | if (mddev->new_level != 5 | 4850 | if (mddev->new_level != 5 |
| 4828 | && mddev->new_level != 4 | 4851 | && mddev->new_level != 4 |
| @@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
| 4946 | printk(KERN_INFO "md/raid:%s: allocated %dkB\n", | 4969 | printk(KERN_INFO "md/raid:%s: allocated %dkB\n", |
| 4947 | mdname(mddev), memory); | 4970 | mdname(mddev), memory); |
| 4948 | 4971 | ||
| 4949 | conf->thread = md_register_thread(raid5d, mddev, NULL); | 4972 | sprintf(pers_name, "raid%d", mddev->new_level); |
| 4973 | conf->thread = md_register_thread(raid5d, mddev, pers_name); | ||
| 4950 | if (!conf->thread) { | 4974 | if (!conf->thread) { |
| 4951 | printk(KERN_ERR | 4975 | printk(KERN_ERR |
| 4952 | "md/raid:%s: couldn't allocate thread.\n", | 4976 | "md/raid:%s: couldn't allocate thread.\n", |
| @@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 5465 | if (rdev->saved_raid_disk >= 0 && | 5489 | if (rdev->saved_raid_disk >= 0 && |
| 5466 | rdev->saved_raid_disk >= first && | 5490 | rdev->saved_raid_disk >= first && |
| 5467 | conf->disks[rdev->saved_raid_disk].rdev == NULL) | 5491 | conf->disks[rdev->saved_raid_disk].rdev == NULL) |
| 5468 | disk = rdev->saved_raid_disk; | 5492 | first = rdev->saved_raid_disk; |
| 5469 | else | 5493 | |
| 5470 | disk = first; | 5494 | for (disk = first; disk <= last; disk++) { |
| 5471 | for ( ; disk <= last ; disk++) { | ||
| 5472 | p = conf->disks + disk; | 5495 | p = conf->disks + disk; |
| 5473 | if (p->rdev == NULL) { | 5496 | if (p->rdev == NULL) { |
| 5474 | clear_bit(In_sync, &rdev->flags); | 5497 | clear_bit(In_sync, &rdev->flags); |
| @@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 5477 | if (rdev->saved_raid_disk != disk) | 5500 | if (rdev->saved_raid_disk != disk) |
| 5478 | conf->fullsync = 1; | 5501 | conf->fullsync = 1; |
| 5479 | rcu_assign_pointer(p->rdev, rdev); | 5502 | rcu_assign_pointer(p->rdev, rdev); |
| 5480 | break; | 5503 | goto out; |
| 5481 | } | 5504 | } |
| 5505 | } | ||
| 5506 | for (disk = first; disk <= last; disk++) { | ||
| 5507 | p = conf->disks + disk; | ||
| 5482 | if (test_bit(WantReplacement, &p->rdev->flags) && | 5508 | if (test_bit(WantReplacement, &p->rdev->flags) && |
| 5483 | p->replacement == NULL) { | 5509 | p->replacement == NULL) { |
| 5484 | clear_bit(In_sync, &rdev->flags); | 5510 | clear_bit(In_sync, &rdev->flags); |
| @@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 5490 | break; | 5516 | break; |
| 5491 | } | 5517 | } |
| 5492 | } | 5518 | } |
| 5519 | out: | ||
| 5493 | print_raid5_conf(conf); | 5520 | print_raid5_conf(conf); |
| 5494 | return err; | 5521 | return err; |
| 5495 | } | 5522 | } |
