diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 136 |
1 files changed, 84 insertions, 52 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d29215d966da..15348c393b5d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/async.h> | 50 | #include <linux/async.h> |
51 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
52 | #include <linux/cpu.h> | 52 | #include <linux/cpu.h> |
53 | #include <linux/slab.h> | ||
53 | #include "md.h" | 54 | #include "md.h" |
54 | #include "raid5.h" | 55 | #include "raid5.h" |
55 | #include "bitmap.h" | 56 | #include "bitmap.h" |
@@ -1526,7 +1527,7 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1526 | 1527 | ||
1527 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 1528 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
1528 | atomic_inc(&rdev->read_errors); | 1529 | atomic_inc(&rdev->read_errors); |
1529 | if (conf->mddev->degraded) | 1530 | if (conf->mddev->degraded >= conf->max_degraded) |
1530 | printk_rl(KERN_WARNING | 1531 | printk_rl(KERN_WARNING |
1531 | "raid5:%s: read error not correctable " | 1532 | "raid5:%s: read error not correctable " |
1532 | "(sector %llu on %s).\n", | 1533 | "(sector %llu on %s).\n", |
@@ -1649,8 +1650,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1649 | int previous, int *dd_idx, | 1650 | int previous, int *dd_idx, |
1650 | struct stripe_head *sh) | 1651 | struct stripe_head *sh) |
1651 | { | 1652 | { |
1652 | long stripe; | 1653 | sector_t stripe, stripe2; |
1653 | unsigned long chunk_number; | 1654 | sector_t chunk_number; |
1654 | unsigned int chunk_offset; | 1655 | unsigned int chunk_offset; |
1655 | int pd_idx, qd_idx; | 1656 | int pd_idx, qd_idx; |
1656 | int ddf_layout = 0; | 1657 | int ddf_layout = 0; |
@@ -1670,18 +1671,13 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1670 | */ | 1671 | */ |
1671 | chunk_offset = sector_div(r_sector, sectors_per_chunk); | 1672 | chunk_offset = sector_div(r_sector, sectors_per_chunk); |
1672 | chunk_number = r_sector; | 1673 | chunk_number = r_sector; |
1673 | BUG_ON(r_sector != chunk_number); | ||
1674 | 1674 | ||
1675 | /* | 1675 | /* |
1676 | * Compute the stripe number | 1676 | * Compute the stripe number |
1677 | */ | 1677 | */ |
1678 | stripe = chunk_number / data_disks; | 1678 | stripe = chunk_number; |
1679 | 1679 | *dd_idx = sector_div(stripe, data_disks); | |
1680 | /* | 1680 | stripe2 = stripe; |
1681 | * Compute the data disk and parity disk indexes inside the stripe | ||
1682 | */ | ||
1683 | *dd_idx = chunk_number % data_disks; | ||
1684 | |||
1685 | /* | 1681 | /* |
1686 | * Select the parity disk based on the user selected algorithm. | 1682 | * Select the parity disk based on the user selected algorithm. |
1687 | */ | 1683 | */ |
@@ -1693,21 +1689,21 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1693 | case 5: | 1689 | case 5: |
1694 | switch (algorithm) { | 1690 | switch (algorithm) { |
1695 | case ALGORITHM_LEFT_ASYMMETRIC: | 1691 | case ALGORITHM_LEFT_ASYMMETRIC: |
1696 | pd_idx = data_disks - stripe % raid_disks; | 1692 | pd_idx = data_disks - sector_div(stripe2, raid_disks); |
1697 | if (*dd_idx >= pd_idx) | 1693 | if (*dd_idx >= pd_idx) |
1698 | (*dd_idx)++; | 1694 | (*dd_idx)++; |
1699 | break; | 1695 | break; |
1700 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1696 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1701 | pd_idx = stripe % raid_disks; | 1697 | pd_idx = sector_div(stripe2, raid_disks); |
1702 | if (*dd_idx >= pd_idx) | 1698 | if (*dd_idx >= pd_idx) |
1703 | (*dd_idx)++; | 1699 | (*dd_idx)++; |
1704 | break; | 1700 | break; |
1705 | case ALGORITHM_LEFT_SYMMETRIC: | 1701 | case ALGORITHM_LEFT_SYMMETRIC: |
1706 | pd_idx = data_disks - stripe % raid_disks; | 1702 | pd_idx = data_disks - sector_div(stripe2, raid_disks); |
1707 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | 1703 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1708 | break; | 1704 | break; |
1709 | case ALGORITHM_RIGHT_SYMMETRIC: | 1705 | case ALGORITHM_RIGHT_SYMMETRIC: |
1710 | pd_idx = stripe % raid_disks; | 1706 | pd_idx = sector_div(stripe2, raid_disks); |
1711 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | 1707 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1712 | break; | 1708 | break; |
1713 | case ALGORITHM_PARITY_0: | 1709 | case ALGORITHM_PARITY_0: |
@@ -1727,7 +1723,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1727 | 1723 | ||
1728 | switch (algorithm) { | 1724 | switch (algorithm) { |
1729 | case ALGORITHM_LEFT_ASYMMETRIC: | 1725 | case ALGORITHM_LEFT_ASYMMETRIC: |
1730 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1726 | pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); |
1731 | qd_idx = pd_idx + 1; | 1727 | qd_idx = pd_idx + 1; |
1732 | if (pd_idx == raid_disks-1) { | 1728 | if (pd_idx == raid_disks-1) { |
1733 | (*dd_idx)++; /* Q D D D P */ | 1729 | (*dd_idx)++; /* Q D D D P */ |
@@ -1736,7 +1732,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1736 | (*dd_idx) += 2; /* D D P Q D */ | 1732 | (*dd_idx) += 2; /* D D P Q D */ |
1737 | break; | 1733 | break; |
1738 | case ALGORITHM_RIGHT_ASYMMETRIC: | 1734 | case ALGORITHM_RIGHT_ASYMMETRIC: |
1739 | pd_idx = stripe % raid_disks; | 1735 | pd_idx = sector_div(stripe2, raid_disks); |
1740 | qd_idx = pd_idx + 1; | 1736 | qd_idx = pd_idx + 1; |
1741 | if (pd_idx == raid_disks-1) { | 1737 | if (pd_idx == raid_disks-1) { |
1742 | (*dd_idx)++; /* Q D D D P */ | 1738 | (*dd_idx)++; /* Q D D D P */ |
@@ -1745,12 +1741,12 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1745 | (*dd_idx) += 2; /* D D P Q D */ | 1741 | (*dd_idx) += 2; /* D D P Q D */ |
1746 | break; | 1742 | break; |
1747 | case ALGORITHM_LEFT_SYMMETRIC: | 1743 | case ALGORITHM_LEFT_SYMMETRIC: |
1748 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1744 | pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); |
1749 | qd_idx = (pd_idx + 1) % raid_disks; | 1745 | qd_idx = (pd_idx + 1) % raid_disks; |
1750 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; | 1746 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; |
1751 | break; | 1747 | break; |
1752 | case ALGORITHM_RIGHT_SYMMETRIC: | 1748 | case ALGORITHM_RIGHT_SYMMETRIC: |
1753 | pd_idx = stripe % raid_disks; | 1749 | pd_idx = sector_div(stripe2, raid_disks); |
1754 | qd_idx = (pd_idx + 1) % raid_disks; | 1750 | qd_idx = (pd_idx + 1) % raid_disks; |
1755 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; | 1751 | *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks; |
1756 | break; | 1752 | break; |
@@ -1769,7 +1765,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1769 | /* Exactly the same as RIGHT_ASYMMETRIC, but or | 1765 | /* Exactly the same as RIGHT_ASYMMETRIC, but or |
1770 | * of blocks for computing Q is different. | 1766 | * of blocks for computing Q is different. |
1771 | */ | 1767 | */ |
1772 | pd_idx = stripe % raid_disks; | 1768 | pd_idx = sector_div(stripe2, raid_disks); |
1773 | qd_idx = pd_idx + 1; | 1769 | qd_idx = pd_idx + 1; |
1774 | if (pd_idx == raid_disks-1) { | 1770 | if (pd_idx == raid_disks-1) { |
1775 | (*dd_idx)++; /* Q D D D P */ | 1771 | (*dd_idx)++; /* Q D D D P */ |
@@ -1784,7 +1780,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1784 | * D D D P Q rather than | 1780 | * D D D P Q rather than |
1785 | * Q D D D P | 1781 | * Q D D D P |
1786 | */ | 1782 | */ |
1787 | pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks); | 1783 | stripe2 += 1; |
1784 | pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); | ||
1788 | qd_idx = pd_idx + 1; | 1785 | qd_idx = pd_idx + 1; |
1789 | if (pd_idx == raid_disks-1) { | 1786 | if (pd_idx == raid_disks-1) { |
1790 | (*dd_idx)++; /* Q D D D P */ | 1787 | (*dd_idx)++; /* Q D D D P */ |
@@ -1796,7 +1793,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1796 | 1793 | ||
1797 | case ALGORITHM_ROTATING_N_CONTINUE: | 1794 | case ALGORITHM_ROTATING_N_CONTINUE: |
1798 | /* Same as left_symmetric but Q is before P */ | 1795 | /* Same as left_symmetric but Q is before P */ |
1799 | pd_idx = raid_disks - 1 - (stripe % raid_disks); | 1796 | pd_idx = raid_disks - 1 - sector_div(stripe2, raid_disks); |
1800 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; | 1797 | qd_idx = (pd_idx + raid_disks - 1) % raid_disks; |
1801 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; | 1798 | *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks; |
1802 | ddf_layout = 1; | 1799 | ddf_layout = 1; |
@@ -1804,27 +1801,27 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, | |||
1804 | 1801 | ||
1805 | case ALGORITHM_LEFT_ASYMMETRIC_6: | 1802 | case ALGORITHM_LEFT_ASYMMETRIC_6: |
1806 | /* RAID5 left_asymmetric, with Q on last device */ | 1803 | /* RAID5 left_asymmetric, with Q on last device */ |
1807 | pd_idx = data_disks - stripe % (raid_disks-1); | 1804 | pd_idx = data_disks - sector_div(stripe2, raid_disks-1); |
1808 | if (*dd_idx >= pd_idx) | 1805 | if (*dd_idx >= pd_idx) |
1809 | (*dd_idx)++; | 1806 | (*dd_idx)++; |
1810 | qd_idx = raid_disks - 1; | 1807 | qd_idx = raid_disks - 1; |
1811 | break; | 1808 | break; |
1812 | 1809 | ||
1813 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | 1810 | case ALGORITHM_RIGHT_ASYMMETRIC_6: |
1814 | pd_idx = stripe % (raid_disks-1); | 1811 | pd_idx = sector_div(stripe2, raid_disks-1); |
1815 | if (*dd_idx >= pd_idx) | 1812 | if (*dd_idx >= pd_idx) |
1816 | (*dd_idx)++; | 1813 | (*dd_idx)++; |
1817 | qd_idx = raid_disks - 1; | 1814 | qd_idx = raid_disks - 1; |
1818 | break; | 1815 | break; |
1819 | 1816 | ||
1820 | case ALGORITHM_LEFT_SYMMETRIC_6: | 1817 | case ALGORITHM_LEFT_SYMMETRIC_6: |
1821 | pd_idx = data_disks - stripe % (raid_disks-1); | 1818 | pd_idx = data_disks - sector_div(stripe2, raid_disks-1); |
1822 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); | 1819 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); |
1823 | qd_idx = raid_disks - 1; | 1820 | qd_idx = raid_disks - 1; |
1824 | break; | 1821 | break; |
1825 | 1822 | ||
1826 | case ALGORITHM_RIGHT_SYMMETRIC_6: | 1823 | case ALGORITHM_RIGHT_SYMMETRIC_6: |
1827 | pd_idx = stripe % (raid_disks-1); | 1824 | pd_idx = sector_div(stripe2, raid_disks-1); |
1828 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); | 1825 | *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1); |
1829 | qd_idx = raid_disks - 1; | 1826 | qd_idx = raid_disks - 1; |
1830 | break; | 1827 | break; |
@@ -1869,14 +1866,14 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
1869 | : conf->algorithm; | 1866 | : conf->algorithm; |
1870 | sector_t stripe; | 1867 | sector_t stripe; |
1871 | int chunk_offset; | 1868 | int chunk_offset; |
1872 | int chunk_number, dummy1, dd_idx = i; | 1869 | sector_t chunk_number; |
1870 | int dummy1, dd_idx = i; | ||
1873 | sector_t r_sector; | 1871 | sector_t r_sector; |
1874 | struct stripe_head sh2; | 1872 | struct stripe_head sh2; |
1875 | 1873 | ||
1876 | 1874 | ||
1877 | chunk_offset = sector_div(new_sector, sectors_per_chunk); | 1875 | chunk_offset = sector_div(new_sector, sectors_per_chunk); |
1878 | stripe = new_sector; | 1876 | stripe = new_sector; |
1879 | BUG_ON(new_sector != stripe); | ||
1880 | 1877 | ||
1881 | if (i == sh->pd_idx) | 1878 | if (i == sh->pd_idx) |
1882 | return 0; | 1879 | return 0; |
@@ -1969,7 +1966,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) | |||
1969 | } | 1966 | } |
1970 | 1967 | ||
1971 | chunk_number = stripe * data_disks + i; | 1968 | chunk_number = stripe * data_disks + i; |
1972 | r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset; | 1969 | r_sector = chunk_number * sectors_per_chunk + chunk_offset; |
1973 | 1970 | ||
1974 | check = raid5_compute_sector(conf, r_sector, | 1971 | check = raid5_compute_sector(conf, r_sector, |
1975 | previous, &dummy1, &sh2); | 1972 | previous, &dummy1, &sh2); |
@@ -2947,6 +2944,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2947 | struct r5dev *dev; | 2944 | struct r5dev *dev; |
2948 | mdk_rdev_t *blocked_rdev = NULL; | 2945 | mdk_rdev_t *blocked_rdev = NULL; |
2949 | int prexor; | 2946 | int prexor; |
2947 | int dec_preread_active = 0; | ||
2950 | 2948 | ||
2951 | memset(&s, 0, sizeof(s)); | 2949 | memset(&s, 0, sizeof(s)); |
2952 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " | 2950 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " |
@@ -3096,12 +3094,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3096 | set_bit(STRIPE_INSYNC, &sh->state); | 3094 | set_bit(STRIPE_INSYNC, &sh->state); |
3097 | } | 3095 | } |
3098 | } | 3096 | } |
3099 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3097 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3100 | atomic_dec(&conf->preread_active_stripes); | 3098 | dec_preread_active = 1; |
3101 | if (atomic_read(&conf->preread_active_stripes) < | ||
3102 | IO_THRESHOLD) | ||
3103 | md_wakeup_thread(conf->mddev->thread); | ||
3104 | } | ||
3105 | } | 3099 | } |
3106 | 3100 | ||
3107 | /* Now to consider new write requests and what else, if anything | 3101 | /* Now to consider new write requests and what else, if anything |
@@ -3208,6 +3202,16 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3208 | 3202 | ||
3209 | ops_run_io(sh, &s); | 3203 | ops_run_io(sh, &s); |
3210 | 3204 | ||
3205 | if (dec_preread_active) { | ||
3206 | /* We delay this until after ops_run_io so that if make_request | ||
3207 | * is waiting on a barrier, it won't continue until the writes | ||
3208 | * have actually been submitted. | ||
3209 | */ | ||
3210 | atomic_dec(&conf->preread_active_stripes); | ||
3211 | if (atomic_read(&conf->preread_active_stripes) < | ||
3212 | IO_THRESHOLD) | ||
3213 | md_wakeup_thread(conf->mddev->thread); | ||
3214 | } | ||
3211 | return_io(return_bi); | 3215 | return_io(return_bi); |
3212 | } | 3216 | } |
3213 | 3217 | ||
@@ -3221,6 +3225,7 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3221 | struct r6_state r6s; | 3225 | struct r6_state r6s; |
3222 | struct r5dev *dev, *pdev, *qdev; | 3226 | struct r5dev *dev, *pdev, *qdev; |
3223 | mdk_rdev_t *blocked_rdev = NULL; | 3227 | mdk_rdev_t *blocked_rdev = NULL; |
3228 | int dec_preread_active = 0; | ||
3224 | 3229 | ||
3225 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3230 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
3226 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", | 3231 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", |
@@ -3358,7 +3363,6 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3358 | * completed | 3363 | * completed |
3359 | */ | 3364 | */ |
3360 | if (sh->reconstruct_state == reconstruct_state_drain_result) { | 3365 | if (sh->reconstruct_state == reconstruct_state_drain_result) { |
3361 | int qd_idx = sh->qd_idx; | ||
3362 | 3366 | ||
3363 | sh->reconstruct_state = reconstruct_state_idle; | 3367 | sh->reconstruct_state = reconstruct_state_idle; |
3364 | /* All the 'written' buffers and the parity blocks are ready to | 3368 | /* All the 'written' buffers and the parity blocks are ready to |
@@ -3380,12 +3384,8 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3380 | set_bit(STRIPE_INSYNC, &sh->state); | 3384 | set_bit(STRIPE_INSYNC, &sh->state); |
3381 | } | 3385 | } |
3382 | } | 3386 | } |
3383 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3387 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3384 | atomic_dec(&conf->preread_active_stripes); | 3388 | dec_preread_active = 1; |
3385 | if (atomic_read(&conf->preread_active_stripes) < | ||
3386 | IO_THRESHOLD) | ||
3387 | md_wakeup_thread(conf->mddev->thread); | ||
3388 | } | ||
3389 | } | 3389 | } |
3390 | 3390 | ||
3391 | /* Now to consider new write requests and what else, if anything | 3391 | /* Now to consider new write requests and what else, if anything |
@@ -3494,6 +3494,18 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3494 | 3494 | ||
3495 | ops_run_io(sh, &s); | 3495 | ops_run_io(sh, &s); |
3496 | 3496 | ||
3497 | |||
3498 | if (dec_preread_active) { | ||
3499 | /* We delay this until after ops_run_io so that if make_request | ||
3500 | * is waiting on a barrier, it won't continue until the writes | ||
3501 | * have actually been submitted. | ||
3502 | */ | ||
3503 | atomic_dec(&conf->preread_active_stripes); | ||
3504 | if (atomic_read(&conf->preread_active_stripes) < | ||
3505 | IO_THRESHOLD) | ||
3506 | md_wakeup_thread(conf->mddev->thread); | ||
3507 | } | ||
3508 | |||
3497 | return_io(return_bi); | 3509 | return_io(return_bi); |
3498 | } | 3510 | } |
3499 | 3511 | ||
@@ -3724,7 +3736,7 @@ static int bio_fits_rdev(struct bio *bi) | |||
3724 | if ((bi->bi_size>>9) > queue_max_sectors(q)) | 3736 | if ((bi->bi_size>>9) > queue_max_sectors(q)) |
3725 | return 0; | 3737 | return 0; |
3726 | blk_recount_segments(q, bi); | 3738 | blk_recount_segments(q, bi); |
3727 | if (bi->bi_phys_segments > queue_max_phys_segments(q)) | 3739 | if (bi->bi_phys_segments > queue_max_segments(q)) |
3728 | return 0; | 3740 | return 0; |
3729 | 3741 | ||
3730 | if (q->merge_bvec_fn) | 3742 | if (q->merge_bvec_fn) |
@@ -3741,7 +3753,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) | |||
3741 | { | 3753 | { |
3742 | mddev_t *mddev = q->queuedata; | 3754 | mddev_t *mddev = q->queuedata; |
3743 | raid5_conf_t *conf = mddev->private; | 3755 | raid5_conf_t *conf = mddev->private; |
3744 | unsigned int dd_idx; | 3756 | int dd_idx; |
3745 | struct bio* align_bi; | 3757 | struct bio* align_bi; |
3746 | mdk_rdev_t *rdev; | 3758 | mdk_rdev_t *rdev; |
3747 | 3759 | ||
@@ -3866,7 +3878,13 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3866 | int cpu, remaining; | 3878 | int cpu, remaining; |
3867 | 3879 | ||
3868 | if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) { | 3880 | if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) { |
3869 | bio_endio(bi, -EOPNOTSUPP); | 3881 | /* Drain all pending writes. We only really need |
3882 | * to ensure they have been submitted, but this is | ||
3883 | * easier. | ||
3884 | */ | ||
3885 | mddev->pers->quiesce(mddev, 1); | ||
3886 | mddev->pers->quiesce(mddev, 0); | ||
3887 | md_barrier_request(mddev, bi); | ||
3870 | return 0; | 3888 | return 0; |
3871 | } | 3889 | } |
3872 | 3890 | ||
@@ -3990,6 +4008,9 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3990 | finish_wait(&conf->wait_for_overlap, &w); | 4008 | finish_wait(&conf->wait_for_overlap, &w); |
3991 | set_bit(STRIPE_HANDLE, &sh->state); | 4009 | set_bit(STRIPE_HANDLE, &sh->state); |
3992 | clear_bit(STRIPE_DELAYED, &sh->state); | 4010 | clear_bit(STRIPE_DELAYED, &sh->state); |
4011 | if (mddev->barrier && | ||
4012 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
4013 | atomic_inc(&conf->preread_active_stripes); | ||
3993 | release_stripe(sh); | 4014 | release_stripe(sh); |
3994 | } else { | 4015 | } else { |
3995 | /* cannot get stripe for read-ahead, just give-up */ | 4016 | /* cannot get stripe for read-ahead, just give-up */ |
@@ -4009,6 +4030,14 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
4009 | 4030 | ||
4010 | bio_endio(bi, 0); | 4031 | bio_endio(bi, 0); |
4011 | } | 4032 | } |
4033 | |||
4034 | if (mddev->barrier) { | ||
4035 | /* We need to wait for the stripes to all be handled. | ||
4036 | * So: wait for preread_active_stripes to drop to 0. | ||
4037 | */ | ||
4038 | wait_event(mddev->thread->wqueue, | ||
4039 | atomic_read(&conf->preread_active_stripes) == 0); | ||
4040 | } | ||
4012 | return 0; | 4041 | return 0; |
4013 | } | 4042 | } |
4014 | 4043 | ||
@@ -4648,7 +4677,7 @@ static int raid5_alloc_percpu(raid5_conf_t *conf) | |||
4648 | { | 4677 | { |
4649 | unsigned long cpu; | 4678 | unsigned long cpu; |
4650 | struct page *spare_page; | 4679 | struct page *spare_page; |
4651 | struct raid5_percpu *allcpus; | 4680 | struct raid5_percpu __percpu *allcpus; |
4652 | void *scribble; | 4681 | void *scribble; |
4653 | int err; | 4682 | int err; |
4654 | 4683 | ||
@@ -5104,9 +5133,8 @@ static int stop(mddev_t *mddev) | |||
5104 | mddev->thread = NULL; | 5133 | mddev->thread = NULL; |
5105 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5134 | mddev->queue->backing_dev_info.congested_fn = NULL; |
5106 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 5135 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ |
5107 | sysfs_remove_group(&mddev->kobj, &raid5_attrs_group); | ||
5108 | free_conf(conf); | 5136 | free_conf(conf); |
5109 | mddev->private = NULL; | 5137 | mddev->private = &raid5_attrs_group; |
5110 | return 0; | 5138 | return 0; |
5111 | } | 5139 | } |
5112 | 5140 | ||
@@ -5432,11 +5460,11 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5432 | !test_bit(Faulty, &rdev->flags)) { | 5460 | !test_bit(Faulty, &rdev->flags)) { |
5433 | if (raid5_add_disk(mddev, rdev) == 0) { | 5461 | if (raid5_add_disk(mddev, rdev) == 0) { |
5434 | char nm[20]; | 5462 | char nm[20]; |
5435 | if (rdev->raid_disk >= conf->previous_raid_disks) | 5463 | if (rdev->raid_disk >= conf->previous_raid_disks) { |
5436 | set_bit(In_sync, &rdev->flags); | 5464 | set_bit(In_sync, &rdev->flags); |
5437 | else | 5465 | added_devices++; |
5466 | } else | ||
5438 | rdev->recovery_offset = 0; | 5467 | rdev->recovery_offset = 0; |
5439 | added_devices++; | ||
5440 | sprintf(nm, "rd%d", rdev->raid_disk); | 5468 | sprintf(nm, "rd%d", rdev->raid_disk); |
5441 | if (sysfs_create_link(&mddev->kobj, | 5469 | if (sysfs_create_link(&mddev->kobj, |
5442 | &rdev->kobj, nm)) | 5470 | &rdev->kobj, nm)) |
@@ -5448,9 +5476,12 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5448 | break; | 5476 | break; |
5449 | } | 5477 | } |
5450 | 5478 | ||
5479 | /* When a reshape changes the number of devices, ->degraded | ||
5480 | * is measured against the large of the pre and post number of | ||
5481 | * devices.*/ | ||
5451 | if (mddev->delta_disks > 0) { | 5482 | if (mddev->delta_disks > 0) { |
5452 | spin_lock_irqsave(&conf->device_lock, flags); | 5483 | spin_lock_irqsave(&conf->device_lock, flags); |
5453 | mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) | 5484 | mddev->degraded += (conf->raid_disks - conf->previous_raid_disks) |
5454 | - added_devices; | 5485 | - added_devices; |
5455 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5486 | spin_unlock_irqrestore(&conf->device_lock, flags); |
5456 | } | 5487 | } |
@@ -5860,6 +5891,7 @@ static void raid5_exit(void) | |||
5860 | module_init(raid5_init); | 5891 | module_init(raid5_init); |
5861 | module_exit(raid5_exit); | 5892 | module_exit(raid5_exit); |
5862 | MODULE_LICENSE("GPL"); | 5893 | MODULE_LICENSE("GPL"); |
5894 | MODULE_DESCRIPTION("RAID4/5/6 (striping with parity) personality for MD"); | ||
5863 | MODULE_ALIAS("md-personality-4"); /* RAID5 */ | 5895 | MODULE_ALIAS("md-personality-4"); /* RAID5 */ |
5864 | MODULE_ALIAS("md-raid5"); | 5896 | MODULE_ALIAS("md-raid5"); |
5865 | MODULE_ALIAS("md-raid4"); | 5897 | MODULE_ALIAS("md-raid4"); |