aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c246
1 files changed, 128 insertions, 118 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 970eb03ec6c0..6d3a2a09cd90 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2696,13 +2696,136 @@ static int make_request(request_queue_t *q, struct bio * bi)
2696 return 0; 2696 return 0;
2697} 2697}
2698 2698
2699/* FIXME go_faster isn't used */ 2699static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
2700static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
2701{ 2700{
2701 /* reshaping is quite different to recovery/resync so it is
2702 * handled quite separately ... here.
2703 *
2704 * On each call to sync_request, we gather one chunk worth of
2705 * destination stripes and flag them as expanding.
2706 * Then we find all the source stripes and request reads.
2707 * As the reads complete, handle_stripe will copy the data
2708 * into the destination stripe and release that stripe.
2709 */
2702 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 2710 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
2703 struct stripe_head *sh; 2711 struct stripe_head *sh;
2704 int pd_idx; 2712 int pd_idx;
2705 sector_t first_sector, last_sector; 2713 sector_t first_sector, last_sector;
2714 int raid_disks;
2715 int data_disks;
2716 int i;
2717 int dd_idx;
2718 sector_t writepos, safepos, gap;
2719
2720 if (sector_nr == 0 &&
2721 conf->expand_progress != 0) {
2722 /* restarting in the middle, skip the initial sectors */
2723 sector_nr = conf->expand_progress;
2724 sector_div(sector_nr, conf->raid_disks-1);
2725 *skipped = 1;
2726 return sector_nr;
2727 }
2728
2729 /* we update the metadata when there is more than 3Meg
2730 * in the block range (that is rather arbitrary, should
2731 * probably be time based) or when the data about to be
2732 * copied would over-write the source of the data at
2733 * the front of the range.
2734 * i.e. one new_stripe forward from expand_progress new_maps
2735 * to after where expand_lo old_maps to
2736 */
2737 writepos = conf->expand_progress +
2738 conf->chunk_size/512*(conf->raid_disks-1);
2739 sector_div(writepos, conf->raid_disks-1);
2740 safepos = conf->expand_lo;
2741 sector_div(safepos, conf->previous_raid_disks-1);
2742 gap = conf->expand_progress - conf->expand_lo;
2743
2744 if (writepos >= safepos ||
2745 gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
2746 /* Cannot proceed until we've updated the superblock... */
2747 wait_event(conf->wait_for_overlap,
2748 atomic_read(&conf->reshape_stripes)==0);
2749 mddev->reshape_position = conf->expand_progress;
2750 mddev->sb_dirty = 1;
2751 md_wakeup_thread(mddev->thread);
2752 wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
2753 kthread_should_stop());
2754 spin_lock_irq(&conf->device_lock);
2755 conf->expand_lo = mddev->reshape_position;
2756 spin_unlock_irq(&conf->device_lock);
2757 wake_up(&conf->wait_for_overlap);
2758 }
2759
2760 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
2761 int j;
2762 int skipped = 0;
2763 pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
2764 sh = get_active_stripe(conf, sector_nr+i,
2765 conf->raid_disks, pd_idx, 0);
2766 set_bit(STRIPE_EXPANDING, &sh->state);
2767 atomic_inc(&conf->reshape_stripes);
2768 /* If any of this stripe is beyond the end of the old
2769 * array, then we need to zero those blocks
2770 */
2771 for (j=sh->disks; j--;) {
2772 sector_t s;
2773 if (j == sh->pd_idx)
2774 continue;
2775 s = compute_blocknr(sh, j);
2776 if (s < (mddev->array_size<<1)) {
2777 skipped = 1;
2778 continue;
2779 }
2780 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
2781 set_bit(R5_Expanded, &sh->dev[j].flags);
2782 set_bit(R5_UPTODATE, &sh->dev[j].flags);
2783 }
2784 if (!skipped) {
2785 set_bit(STRIPE_EXPAND_READY, &sh->state);
2786 set_bit(STRIPE_HANDLE, &sh->state);
2787 }
2788 release_stripe(sh);
2789 }
2790 spin_lock_irq(&conf->device_lock);
2791 conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
2792 spin_unlock_irq(&conf->device_lock);
2793 /* Ok, those stripe are ready. We can start scheduling
2794 * reads on the source stripes.
2795 * The source stripes are determined by mapping the first and last
2796 * block on the destination stripes.
2797 */
2798 raid_disks = conf->previous_raid_disks;
2799 data_disks = raid_disks - 1;
2800 first_sector =
2801 raid5_compute_sector(sector_nr*(conf->raid_disks-1),
2802 raid_disks, data_disks,
2803 &dd_idx, &pd_idx, conf);
2804 last_sector =
2805 raid5_compute_sector((sector_nr+conf->chunk_size/512)
2806 *(conf->raid_disks-1) -1,
2807 raid_disks, data_disks,
2808 &dd_idx, &pd_idx, conf);
2809 if (last_sector >= (mddev->size<<1))
2810 last_sector = (mddev->size<<1)-1;
2811 while (first_sector <= last_sector) {
2812 pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
2813 sh = get_active_stripe(conf, first_sector,
2814 conf->previous_raid_disks, pd_idx, 0);
2815 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2816 set_bit(STRIPE_HANDLE, &sh->state);
2817 release_stripe(sh);
2818 first_sector += STRIPE_SECTORS;
2819 }
2820 return conf->chunk_size>>9;
2821}
2822
2823/* FIXME go_faster isn't used */
2824static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
2825{
2826 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
2827 struct stripe_head *sh;
2828 int pd_idx;
2706 int raid_disks = conf->raid_disks; 2829 int raid_disks = conf->raid_disks;
2707 int data_disks = raid_disks - conf->max_degraded; 2830 int data_disks = raid_disks - conf->max_degraded;
2708 sector_t max_sector = mddev->size << 1; 2831 sector_t max_sector = mddev->size << 1;
@@ -2728,122 +2851,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
2728 return 0; 2851 return 0;
2729 } 2852 }
2730 2853
2731 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 2854 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2732 /* reshaping is quite different to recovery/resync so it is 2855 return reshape_request(mddev, sector_nr, skipped);
2733 * handled quite separately ... here. 2856
2734 *
2735 * On each call to sync_request, we gather one chunk worth of
2736 * destination stripes and flag them as expanding.
2737 * Then we find all the source stripes and request reads.
2738 * As the reads complete, handle_stripe will copy the data
2739 * into the destination stripe and release that stripe.
2740 */
2741 int i;
2742 int dd_idx;
2743 sector_t writepos, safepos, gap;
2744
2745 if (sector_nr == 0 &&
2746 conf->expand_progress != 0) {
2747 /* restarting in the middle, skip the initial sectors */
2748 sector_nr = conf->expand_progress;
2749 sector_div(sector_nr, conf->raid_disks-1);
2750 *skipped = 1;
2751 return sector_nr;
2752 }
2753
2754 /* we update the metadata when there is more than 3Meg
2755 * in the block range (that is rather arbitrary, should
2756 * probably be time based) or when the data about to be
2757 * copied would over-write the source of the data at
2758 * the front of the range.
2759 * i.e. one new_stripe forward from expand_progress new_maps
2760 * to after where expand_lo old_maps to
2761 */
2762 writepos = conf->expand_progress +
2763 conf->chunk_size/512*(conf->raid_disks-1);
2764 sector_div(writepos, conf->raid_disks-1);
2765 safepos = conf->expand_lo;
2766 sector_div(safepos, conf->previous_raid_disks-1);
2767 gap = conf->expand_progress - conf->expand_lo;
2768
2769 if (writepos >= safepos ||
2770 gap > (conf->raid_disks-1)*3000*2 /*3Meg*/) {
2771 /* Cannot proceed until we've updated the superblock... */
2772 wait_event(conf->wait_for_overlap,
2773 atomic_read(&conf->reshape_stripes)==0);
2774 mddev->reshape_position = conf->expand_progress;
2775 mddev->sb_dirty = 1;
2776 md_wakeup_thread(mddev->thread);
2777 wait_event(mddev->sb_wait, mddev->sb_dirty == 0 ||
2778 kthread_should_stop());
2779 spin_lock_irq(&conf->device_lock);
2780 conf->expand_lo = mddev->reshape_position;
2781 spin_unlock_irq(&conf->device_lock);
2782 wake_up(&conf->wait_for_overlap);
2783 }
2784
2785 for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
2786 int j;
2787 int skipped = 0;
2788 pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
2789 sh = get_active_stripe(conf, sector_nr+i,
2790 conf->raid_disks, pd_idx, 0);
2791 set_bit(STRIPE_EXPANDING, &sh->state);
2792 atomic_inc(&conf->reshape_stripes);
2793 /* If any of this stripe is beyond the end of the old
2794 * array, then we need to zero those blocks
2795 */
2796 for (j=sh->disks; j--;) {
2797 sector_t s;
2798 if (j == sh->pd_idx)
2799 continue;
2800 s = compute_blocknr(sh, j);
2801 if (s < (mddev->array_size<<1)) {
2802 skipped = 1;
2803 continue;
2804 }
2805 memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
2806 set_bit(R5_Expanded, &sh->dev[j].flags);
2807 set_bit(R5_UPTODATE, &sh->dev[j].flags);
2808 }
2809 if (!skipped) {
2810 set_bit(STRIPE_EXPAND_READY, &sh->state);
2811 set_bit(STRIPE_HANDLE, &sh->state);
2812 }
2813 release_stripe(sh);
2814 }
2815 spin_lock_irq(&conf->device_lock);
2816 conf->expand_progress = (sector_nr + i)*(conf->raid_disks-1);
2817 spin_unlock_irq(&conf->device_lock);
2818 /* Ok, those stripe are ready. We can start scheduling
2819 * reads on the source stripes.
2820 * The source stripes are determined by mapping the first and last
2821 * block on the destination stripes.
2822 */
2823 raid_disks = conf->previous_raid_disks;
2824 data_disks = raid_disks - 1;
2825 first_sector =
2826 raid5_compute_sector(sector_nr*(conf->raid_disks-1),
2827 raid_disks, data_disks,
2828 &dd_idx, &pd_idx, conf);
2829 last_sector =
2830 raid5_compute_sector((sector_nr+conf->chunk_size/512)
2831 *(conf->raid_disks-1) -1,
2832 raid_disks, data_disks,
2833 &dd_idx, &pd_idx, conf);
2834 if (last_sector >= (mddev->size<<1))
2835 last_sector = (mddev->size<<1)-1;
2836 while (first_sector <= last_sector) {
2837 pd_idx = stripe_to_pdidx(first_sector, conf, conf->previous_raid_disks);
2838 sh = get_active_stripe(conf, first_sector,
2839 conf->previous_raid_disks, pd_idx, 0);
2840 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2841 set_bit(STRIPE_HANDLE, &sh->state);
2842 release_stripe(sh);
2843 first_sector += STRIPE_SECTORS;
2844 }
2845 return conf->chunk_size>>9;
2846 }
2847 /* if there is too many failed drives and we are trying 2857 /* if there is too many failed drives and we are trying
2848 * to resync, then assert that we are finished, because there is 2858 * to resync, then assert that we are finished, because there is
2849 * nothing we can do. 2859 * nothing we can do.