diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 140 |
1 files changed, 120 insertions, 20 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b29135acb1d9..20ae32d67e21 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/raid/raid5.h> | 22 | #include <linux/raid/raid5.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/bitops.h> | 24 | #include <linux/bitops.h> |
25 | #include <linux/kthread.h> | ||
25 | #include <asm/atomic.h> | 26 | #include <asm/atomic.h> |
26 | 27 | ||
27 | #include <linux/raid/bitmap.h> | 28 | #include <linux/raid/bitmap.h> |
@@ -1504,6 +1505,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
1504 | clear_bit(STRIPE_EXPANDING, &sh->state); | 1505 | clear_bit(STRIPE_EXPANDING, &sh->state); |
1505 | } else if (expanded) { | 1506 | } else if (expanded) { |
1506 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 1507 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
1508 | atomic_dec(&conf->reshape_stripes); | ||
1507 | wake_up(&conf->wait_for_overlap); | 1509 | wake_up(&conf->wait_for_overlap); |
1508 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); | 1510 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); |
1509 | } | 1511 | } |
@@ -1875,6 +1877,26 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1875 | */ | 1877 | */ |
1876 | int i; | 1878 | int i; |
1877 | int dd_idx; | 1879 | int dd_idx; |
1880 | |||
1881 | if (sector_nr == 0 && | ||
1882 | conf->expand_progress != 0) { | ||
1883 | /* restarting in the middle, skip the initial sectors */ | ||
1884 | sector_nr = conf->expand_progress; | ||
1885 | sector_div(sector_nr, conf->raid_disks-1); | ||
1886 | *skipped = 1; | ||
1887 | return sector_nr; | ||
1888 | } | ||
1889 | |||
1890 | /* Cannot proceed until we've updated the superblock... */ | ||
1891 | wait_event(conf->wait_for_overlap, | ||
1892 | atomic_read(&conf->reshape_stripes)==0); | ||
1893 | mddev->reshape_position = conf->expand_progress; | ||
1894 | |||
1895 | mddev->sb_dirty = 1; | ||
1896 | md_wakeup_thread(mddev->thread); | ||
1897 | wait_event(mddev->sb_wait, mddev->sb_dirty == 0 || | ||
1898 | kthread_should_stop()); | ||
1899 | |||
1878 | for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { | 1900 | for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { |
1879 | int j; | 1901 | int j; |
1880 | int skipped = 0; | 1902 | int skipped = 0; |
@@ -1882,6 +1904,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1882 | sh = get_active_stripe(conf, sector_nr+i, | 1904 | sh = get_active_stripe(conf, sector_nr+i, |
1883 | conf->raid_disks, pd_idx, 0); | 1905 | conf->raid_disks, pd_idx, 0); |
1884 | set_bit(STRIPE_EXPANDING, &sh->state); | 1906 | set_bit(STRIPE_EXPANDING, &sh->state); |
1907 | atomic_inc(&conf->reshape_stripes); | ||
1885 | /* If any of this stripe is beyond the end of the old | 1908 | /* If any of this stripe is beyond the end of the old |
1886 | * array, then we need to zero those blocks | 1909 | * array, then we need to zero those blocks |
1887 | */ | 1910 | */ |
@@ -2121,10 +2144,61 @@ static int run(mddev_t *mddev) | |||
2121 | return -EIO; | 2144 | return -EIO; |
2122 | } | 2145 | } |
2123 | 2146 | ||
2147 | if (mddev->reshape_position != MaxSector) { | ||
2148 | /* Check that we can continue the reshape. | ||
2149 | * Currently only disks can change, it must | ||
2150 | * increase, and we must be past the point where | ||
2151 | * a stripe over-writes itself | ||
2152 | */ | ||
2153 | sector_t here_new, here_old; | ||
2154 | int old_disks; | ||
2155 | |||
2156 | if (mddev->new_level != mddev->level || | ||
2157 | mddev->new_layout != mddev->layout || | ||
2158 | mddev->new_chunk != mddev->chunk_size) { | ||
2159 | printk(KERN_ERR "raid5: %s: unsupported reshape required - aborting.\n", | ||
2160 | mdname(mddev)); | ||
2161 | return -EINVAL; | ||
2162 | } | ||
2163 | if (mddev->delta_disks <= 0) { | ||
2164 | printk(KERN_ERR "raid5: %s: unsupported reshape (reduce disks) required - aborting.\n", | ||
2165 | mdname(mddev)); | ||
2166 | return -EINVAL; | ||
2167 | } | ||
2168 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
2169 | /* reshape_position must be on a new-stripe boundary, and one | ||
2170 | * further up in new geometry must map after here in old geometry. | ||
2171 | */ | ||
2172 | here_new = mddev->reshape_position; | ||
2173 | if (sector_div(here_new, (mddev->chunk_size>>9)*(mddev->raid_disks-1))) { | ||
2174 | printk(KERN_ERR "raid5: reshape_position not on a stripe boundary\n"); | ||
2175 | return -EINVAL; | ||
2176 | } | ||
2177 | /* here_new is the stripe we will write to */ | ||
2178 | here_old = mddev->reshape_position; | ||
2179 | sector_div(here_old, (mddev->chunk_size>>9)*(old_disks-1)); | ||
2180 | /* here_old is the first stripe that we might need to read from */ | ||
2181 | if (here_new >= here_old) { | ||
2182 | /* Reading from the same stripe as writing to - bad */ | ||
2183 | printk(KERN_ERR "raid5: reshape_position too early for auto-recovery - aborting.\n"); | ||
2184 | return -EINVAL; | ||
2185 | } | ||
2186 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
2187 | /* OK, we should be able to continue; */ | ||
2188 | } | ||
2189 | |||
2190 | |||
2124 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); | 2191 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); |
2125 | if ((conf = mddev->private) == NULL) | 2192 | if ((conf = mddev->private) == NULL) |
2126 | goto abort; | 2193 | goto abort; |
2127 | conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info), | 2194 | if (mddev->reshape_position == MaxSector) { |
2195 | conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; | ||
2196 | } else { | ||
2197 | conf->raid_disks = mddev->raid_disks; | ||
2198 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | ||
2199 | } | ||
2200 | |||
2201 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), | ||
2128 | GFP_KERNEL); | 2202 | GFP_KERNEL); |
2129 | if (!conf->disks) | 2203 | if (!conf->disks) |
2130 | goto abort; | 2204 | goto abort; |
@@ -2148,7 +2222,7 @@ static int run(mddev_t *mddev) | |||
2148 | 2222 | ||
2149 | ITERATE_RDEV(mddev,rdev,tmp) { | 2223 | ITERATE_RDEV(mddev,rdev,tmp) { |
2150 | raid_disk = rdev->raid_disk; | 2224 | raid_disk = rdev->raid_disk; |
2151 | if (raid_disk >= mddev->raid_disks | 2225 | if (raid_disk >= conf->raid_disks |
2152 | || raid_disk < 0) | 2226 | || raid_disk < 0) |
2153 | continue; | 2227 | continue; |
2154 | disk = conf->disks + raid_disk; | 2228 | disk = conf->disks + raid_disk; |
@@ -2164,7 +2238,6 @@ static int run(mddev_t *mddev) | |||
2164 | } | 2238 | } |
2165 | } | 2239 | } |
2166 | 2240 | ||
2167 | conf->raid_disks = mddev->raid_disks; | ||
2168 | /* | 2241 | /* |
2169 | * 0 for a fully functional array, 1 for a degraded array. | 2242 | * 0 for a fully functional array, 1 for a degraded array. |
2170 | */ | 2243 | */ |
@@ -2174,7 +2247,7 @@ static int run(mddev_t *mddev) | |||
2174 | conf->level = mddev->level; | 2247 | conf->level = mddev->level; |
2175 | conf->algorithm = mddev->layout; | 2248 | conf->algorithm = mddev->layout; |
2176 | conf->max_nr_stripes = NR_STRIPES; | 2249 | conf->max_nr_stripes = NR_STRIPES; |
2177 | conf->expand_progress = MaxSector; | 2250 | conf->expand_progress = mddev->reshape_position; |
2178 | 2251 | ||
2179 | /* device size must be a multiple of chunk size */ | 2252 | /* device size must be a multiple of chunk size */ |
2180 | mddev->size &= ~(mddev->chunk_size/1024 -1); | 2253 | mddev->size &= ~(mddev->chunk_size/1024 -1); |
@@ -2247,6 +2320,20 @@ static int run(mddev_t *mddev) | |||
2247 | 2320 | ||
2248 | print_raid5_conf(conf); | 2321 | print_raid5_conf(conf); |
2249 | 2322 | ||
2323 | if (conf->expand_progress != MaxSector) { | ||
2324 | printk("...ok start reshape thread\n"); | ||
2325 | atomic_set(&conf->reshape_stripes, 0); | ||
2326 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
2327 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
2328 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
2329 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
2330 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | ||
2331 | "%s_reshape"); | ||
2332 | /* FIXME if md_register_thread fails?? */ | ||
2333 | md_wakeup_thread(mddev->sync_thread); | ||
2334 | |||
2335 | } | ||
2336 | |||
2250 | /* read-ahead size must cover two whole stripes, which is | 2337 | /* read-ahead size must cover two whole stripes, which is |
2251 | * 2 * (n-1) * chunksize where 'n' is the number of raid devices | 2338 | * 2 * (n-1) * chunksize where 'n' is the number of raid devices |
2252 | */ | 2339 | */ |
@@ -2262,8 +2349,8 @@ static int run(mddev_t *mddev) | |||
2262 | 2349 | ||
2263 | mddev->queue->unplug_fn = raid5_unplug_device; | 2350 | mddev->queue->unplug_fn = raid5_unplug_device; |
2264 | mddev->queue->issue_flush_fn = raid5_issue_flush; | 2351 | mddev->queue->issue_flush_fn = raid5_issue_flush; |
2352 | mddev->array_size = mddev->size * (conf->previous_raid_disks - 1); | ||
2265 | 2353 | ||
2266 | mddev->array_size = mddev->size * (mddev->raid_disks - 1); | ||
2267 | return 0; | 2354 | return 0; |
2268 | abort: | 2355 | abort: |
2269 | if (conf) { | 2356 | if (conf) { |
@@ -2436,7 +2523,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2436 | /* | 2523 | /* |
2437 | * find the disk ... | 2524 | * find the disk ... |
2438 | */ | 2525 | */ |
2439 | for (disk=0; disk < mddev->raid_disks; disk++) | 2526 | for (disk=0; disk < conf->raid_disks; disk++) |
2440 | if ((p=conf->disks + disk)->rdev == NULL) { | 2527 | if ((p=conf->disks + disk)->rdev == NULL) { |
2441 | clear_bit(In_sync, &rdev->flags); | 2528 | clear_bit(In_sync, &rdev->flags); |
2442 | rdev->raid_disk = disk; | 2529 | rdev->raid_disk = disk; |
@@ -2518,9 +2605,10 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2518 | if (err) | 2605 | if (err) |
2519 | return err; | 2606 | return err; |
2520 | 2607 | ||
2608 | atomic_set(&conf->reshape_stripes, 0); | ||
2521 | spin_lock_irq(&conf->device_lock); | 2609 | spin_lock_irq(&conf->device_lock); |
2522 | conf->previous_raid_disks = conf->raid_disks; | 2610 | conf->previous_raid_disks = conf->raid_disks; |
2523 | mddev->raid_disks = conf->raid_disks = raid_disks; | 2611 | conf->raid_disks = raid_disks; |
2524 | conf->expand_progress = 0; | 2612 | conf->expand_progress = 0; |
2525 | spin_unlock_irq(&conf->device_lock); | 2613 | spin_unlock_irq(&conf->device_lock); |
2526 | 2614 | ||
@@ -2542,6 +2630,14 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2542 | } | 2630 | } |
2543 | 2631 | ||
2544 | mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices; | 2632 | mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices; |
2633 | mddev->new_chunk = mddev->chunk_size; | ||
2634 | mddev->new_layout = mddev->layout; | ||
2635 | mddev->new_level = mddev->level; | ||
2636 | mddev->raid_disks = raid_disks; | ||
2637 | mddev->delta_disks = raid_disks - conf->previous_raid_disks; | ||
2638 | mddev->reshape_position = 0; | ||
2639 | mddev->sb_dirty = 1; | ||
2640 | |||
2545 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 2641 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
2546 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 2642 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
2547 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 2643 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
@@ -2552,6 +2648,7 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2552 | mddev->recovery = 0; | 2648 | mddev->recovery = 0; |
2553 | spin_lock_irq(&conf->device_lock); | 2649 | spin_lock_irq(&conf->device_lock); |
2554 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 2650 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
2651 | mddev->delta_disks = 0; | ||
2555 | conf->expand_progress = MaxSector; | 2652 | conf->expand_progress = MaxSector; |
2556 | spin_unlock_irq(&conf->device_lock); | 2653 | spin_unlock_irq(&conf->device_lock); |
2557 | return -EAGAIN; | 2654 | return -EAGAIN; |
@@ -2566,20 +2663,23 @@ static void end_reshape(raid5_conf_t *conf) | |||
2566 | { | 2663 | { |
2567 | struct block_device *bdev; | 2664 | struct block_device *bdev; |
2568 | 2665 | ||
2569 | conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1); | 2666 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
2570 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); | 2667 | conf->mddev->array_size = conf->mddev->size * (conf->raid_disks-1); |
2571 | conf->mddev->changed = 1; | 2668 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); |
2572 | 2669 | conf->mddev->changed = 1; | |
2573 | bdev = bdget_disk(conf->mddev->gendisk, 0); | 2670 | |
2574 | if (bdev) { | 2671 | bdev = bdget_disk(conf->mddev->gendisk, 0); |
2575 | mutex_lock(&bdev->bd_inode->i_mutex); | 2672 | if (bdev) { |
2576 | i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); | 2673 | mutex_lock(&bdev->bd_inode->i_mutex); |
2577 | mutex_unlock(&bdev->bd_inode->i_mutex); | 2674 | i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); |
2578 | bdput(bdev); | 2675 | mutex_unlock(&bdev->bd_inode->i_mutex); |
2676 | bdput(bdev); | ||
2677 | } | ||
2678 | spin_lock_irq(&conf->device_lock); | ||
2679 | conf->expand_progress = MaxSector; | ||
2680 | spin_unlock_irq(&conf->device_lock); | ||
2681 | conf->mddev->reshape_position = MaxSector; | ||
2579 | } | 2682 | } |
2580 | spin_lock_irq(&conf->device_lock); | ||
2581 | conf->expand_progress = MaxSector; | ||
2582 | spin_unlock_irq(&conf->device_lock); | ||
2583 | } | 2683 | } |
2584 | 2684 | ||
2585 | static void raid5_quiesce(mddev_t *mddev, int state) | 2685 | static void raid5_quiesce(mddev_t *mddev, int state) |