diff options
author | Trela, Maciej <Maciej.Trela@intel.com> | 2010-03-08 00:02:45 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2010-05-18 01:27:48 -0400 |
commit | dab8b29248b3f14f456651a2a6ee9b8fd16d1b3c (patch) | |
tree | ced1c7cd74f2e0efc5a48819aa4711047960e2d5 | |
parent | 9af204cf720cedf369cf823bbd806c350201f7ea (diff) |
md: Add support for Raid0->Raid10 takeover
Signed-off-by: Maciej Trela <maciej.trela@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid10.c | 194 | ||||
-rw-r--r-- | drivers/md/raid10.h | 12 |
2 files changed, 155 insertions, 51 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 044c1157d98d..57d71d5d88f4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | #include "md.h" | 24 | #include "md.h" |
25 | #include "raid10.h" | 25 | #include "raid10.h" |
26 | #include "raid0.h" | ||
26 | #include "bitmap.h" | 27 | #include "bitmap.h" |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -2141,7 +2142,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2141 | if (!raid_disks) | 2142 | if (!raid_disks) |
2142 | raid_disks = conf->raid_disks; | 2143 | raid_disks = conf->raid_disks; |
2143 | if (!sectors) | 2144 | if (!sectors) |
2144 | sectors = mddev->dev_sectors; | 2145 | sectors = conf->dev_sectors; |
2145 | 2146 | ||
2146 | size = sectors >> conf->chunk_shift; | 2147 | size = sectors >> conf->chunk_shift; |
2147 | sector_div(size, conf->far_copies); | 2148 | sector_div(size, conf->far_copies); |
@@ -2151,62 +2152,60 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2151 | return size << conf->chunk_shift; | 2152 | return size << conf->chunk_shift; |
2152 | } | 2153 | } |
2153 | 2154 | ||
2154 | static int run(mddev_t *mddev) | 2155 | |
2156 | static conf_t *setup_conf(mddev_t *mddev) | ||
2155 | { | 2157 | { |
2156 | conf_t *conf; | 2158 | conf_t *conf = NULL; |
2157 | int i, disk_idx, chunk_size; | ||
2158 | mirror_info_t *disk; | ||
2159 | mdk_rdev_t *rdev; | ||
2160 | int nc, fc, fo; | 2159 | int nc, fc, fo; |
2161 | sector_t stride, size; | 2160 | sector_t stride, size; |
2161 | int err = -EINVAL; | ||
2162 | 2162 | ||
2163 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || | 2163 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || |
2164 | !is_power_of_2(mddev->chunk_sectors)) { | 2164 | !is_power_of_2(mddev->chunk_sectors)) { |
2165 | printk(KERN_ERR "md/raid10: chunk size must be " | 2165 | printk(KERN_ERR "md/raid10: chunk size must be " |
2166 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); | 2166 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); |
2167 | return -EINVAL; | 2167 | goto out; |
2168 | } | 2168 | } |
2169 | 2169 | ||
2170 | nc = mddev->layout & 255; | 2170 | nc = mddev->layout & 255; |
2171 | fc = (mddev->layout >> 8) & 255; | 2171 | fc = (mddev->layout >> 8) & 255; |
2172 | fo = mddev->layout & (1<<16); | 2172 | fo = mddev->layout & (1<<16); |
2173 | |||
2173 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || | 2174 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || |
2174 | (mddev->layout >> 17)) { | 2175 | (mddev->layout >> 17)) { |
2175 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", | 2176 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", |
2176 | mdname(mddev), mddev->layout); | 2177 | mdname(mddev), mddev->layout); |
2177 | goto out; | 2178 | goto out; |
2178 | } | 2179 | } |
2179 | /* | 2180 | |
2180 | * copy the already verified devices into our private RAID10 | 2181 | err = -ENOMEM; |
2181 | * bookkeeping area. [whatever we allocate in run(), | ||
2182 | * should be freed in stop()] | ||
2183 | */ | ||
2184 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); | 2182 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
2185 | mddev->private = conf; | 2183 | if (!conf) |
2186 | if (!conf) { | ||
2187 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
2188 | mdname(mddev)); | ||
2189 | goto out; | 2184 | goto out; |
2190 | } | 2185 | |
2191 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, | 2186 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
2192 | GFP_KERNEL); | 2187 | GFP_KERNEL); |
2193 | if (!conf->mirrors) { | 2188 | if (!conf->mirrors) |
2194 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 2189 | goto out; |
2195 | mdname(mddev)); | ||
2196 | goto out_free_conf; | ||
2197 | } | ||
2198 | 2190 | ||
2199 | conf->tmppage = alloc_page(GFP_KERNEL); | 2191 | conf->tmppage = alloc_page(GFP_KERNEL); |
2200 | if (!conf->tmppage) | 2192 | if (!conf->tmppage) |
2201 | goto out_free_conf; | 2193 | goto out; |
2194 | |||
2202 | 2195 | ||
2203 | conf->raid_disks = mddev->raid_disks; | 2196 | conf->raid_disks = mddev->raid_disks; |
2204 | conf->near_copies = nc; | 2197 | conf->near_copies = nc; |
2205 | conf->far_copies = fc; | 2198 | conf->far_copies = fc; |
2206 | conf->copies = nc*fc; | 2199 | conf->copies = nc*fc; |
2207 | conf->far_offset = fo; | 2200 | conf->far_offset = fo; |
2208 | conf->chunk_mask = mddev->chunk_sectors - 1; | 2201 | conf->chunk_mask = mddev->new_chunk_sectors - 1; |
2209 | conf->chunk_shift = ffz(~mddev->chunk_sectors); | 2202 | conf->chunk_shift = ffz(~mddev->new_chunk_sectors); |
2203 | |||
2204 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
2205 | r10bio_pool_free, conf); | ||
2206 | if (!conf->r10bio_pool) | ||
2207 | goto out; | ||
2208 | |||
2210 | size = mddev->dev_sectors >> conf->chunk_shift; | 2209 | size = mddev->dev_sectors >> conf->chunk_shift; |
2211 | sector_div(size, fc); | 2210 | sector_div(size, fc); |
2212 | size = size * conf->raid_disks; | 2211 | size = size * conf->raid_disks; |
@@ -2220,7 +2219,8 @@ static int run(mddev_t *mddev) | |||
2220 | */ | 2219 | */ |
2221 | stride += conf->raid_disks - 1; | 2220 | stride += conf->raid_disks - 1; |
2222 | sector_div(stride, conf->raid_disks); | 2221 | sector_div(stride, conf->raid_disks); |
2223 | mddev->dev_sectors = stride << conf->chunk_shift; | 2222 | |
2223 | conf->dev_sectors = stride << conf->chunk_shift; | ||
2224 | 2224 | ||
2225 | if (fo) | 2225 | if (fo) |
2226 | stride = 1; | 2226 | stride = 1; |
@@ -2228,18 +2228,63 @@ static int run(mddev_t *mddev) | |||
2228 | sector_div(stride, fc); | 2228 | sector_div(stride, fc); |
2229 | conf->stride = stride << conf->chunk_shift; | 2229 | conf->stride = stride << conf->chunk_shift; |
2230 | 2230 | ||
2231 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
2232 | r10bio_pool_free, conf); | ||
2233 | if (!conf->r10bio_pool) { | ||
2234 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
2235 | mdname(mddev)); | ||
2236 | goto out_free_conf; | ||
2237 | } | ||
2238 | 2231 | ||
2239 | conf->mddev = mddev; | ||
2240 | spin_lock_init(&conf->device_lock); | 2232 | spin_lock_init(&conf->device_lock); |
2233 | INIT_LIST_HEAD(&conf->retry_list); | ||
2234 | |||
2235 | spin_lock_init(&conf->resync_lock); | ||
2236 | init_waitqueue_head(&conf->wait_barrier); | ||
2237 | |||
2238 | conf->thread = md_register_thread(raid10d, mddev, NULL); | ||
2239 | if (!conf->thread) | ||
2240 | goto out; | ||
2241 | |||
2242 | conf->scale_disks = 0; | ||
2243 | conf->mddev = mddev; | ||
2244 | return conf; | ||
2245 | |||
2246 | out: | ||
2247 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
2248 | mdname(mddev)); | ||
2249 | if (conf) { | ||
2250 | if (conf->r10bio_pool) | ||
2251 | mempool_destroy(conf->r10bio_pool); | ||
2252 | kfree(conf->mirrors); | ||
2253 | safe_put_page(conf->tmppage); | ||
2254 | kfree(conf); | ||
2255 | } | ||
2256 | return ERR_PTR(err); | ||
2257 | } | ||
2258 | |||
2259 | static int run(mddev_t *mddev) | ||
2260 | { | ||
2261 | conf_t *conf; | ||
2262 | int i, disk_idx, chunk_size; | ||
2263 | mirror_info_t *disk; | ||
2264 | mdk_rdev_t *rdev; | ||
2265 | sector_t size; | ||
2266 | |||
2267 | /* | ||
2268 | * copy the already verified devices into our private RAID10 | ||
2269 | * bookkeeping area. [whatever we allocate in run(), | ||
2270 | * should be freed in stop()] | ||
2271 | */ | ||
2272 | |||
2273 | if (mddev->private == NULL) { | ||
2274 | conf = setup_conf(mddev); | ||
2275 | if (IS_ERR(conf)) | ||
2276 | return PTR_ERR(conf); | ||
2277 | mddev->private = conf; | ||
2278 | } | ||
2279 | conf = mddev->private; | ||
2280 | if (!conf) | ||
2281 | goto out; | ||
2282 | |||
2241 | mddev->queue->queue_lock = &conf->device_lock; | 2283 | mddev->queue->queue_lock = &conf->device_lock; |
2242 | 2284 | ||
2285 | mddev->thread = conf->thread; | ||
2286 | conf->thread = NULL; | ||
2287 | |||
2243 | chunk_size = mddev->chunk_sectors << 9; | 2288 | chunk_size = mddev->chunk_sectors << 9; |
2244 | blk_queue_io_min(mddev->queue, chunk_size); | 2289 | blk_queue_io_min(mddev->queue, chunk_size); |
2245 | if (conf->raid_disks % conf->near_copies) | 2290 | if (conf->raid_disks % conf->near_copies) |
@@ -2253,6 +2298,11 @@ static int run(mddev_t *mddev) | |||
2253 | if (disk_idx >= conf->raid_disks | 2298 | if (disk_idx >= conf->raid_disks |
2254 | || disk_idx < 0) | 2299 | || disk_idx < 0) |
2255 | continue; | 2300 | continue; |
2301 | if (conf->scale_disks) { | ||
2302 | disk_idx *= conf->scale_disks; | ||
2303 | rdev->raid_disk = disk_idx; | ||
2304 | /* MOVE 'rd%d' link !! */ | ||
2305 | } | ||
2256 | disk = conf->mirrors + disk_idx; | 2306 | disk = conf->mirrors + disk_idx; |
2257 | 2307 | ||
2258 | disk->rdev = rdev; | 2308 | disk->rdev = rdev; |
@@ -2270,11 +2320,6 @@ static int run(mddev_t *mddev) | |||
2270 | 2320 | ||
2271 | disk->head_position = 0; | 2321 | disk->head_position = 0; |
2272 | } | 2322 | } |
2273 | INIT_LIST_HEAD(&conf->retry_list); | ||
2274 | |||
2275 | spin_lock_init(&conf->resync_lock); | ||
2276 | init_waitqueue_head(&conf->wait_barrier); | ||
2277 | |||
2278 | /* need to check that every block has at least one working mirror */ | 2323 | /* need to check that every block has at least one working mirror */ |
2279 | if (!enough(conf)) { | 2324 | if (!enough(conf)) { |
2280 | printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", | 2325 | printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", |
@@ -2296,15 +2341,6 @@ static int run(mddev_t *mddev) | |||
2296 | } | 2341 | } |
2297 | } | 2342 | } |
2298 | 2343 | ||
2299 | |||
2300 | mddev->thread = md_register_thread(raid10d, mddev, NULL); | ||
2301 | if (!mddev->thread) { | ||
2302 | printk(KERN_ERR | ||
2303 | "raid10: couldn't allocate thread for %s\n", | ||
2304 | mdname(mddev)); | ||
2305 | goto out_free_conf; | ||
2306 | } | ||
2307 | |||
2308 | if (mddev->recovery_cp != MaxSector) | 2344 | if (mddev->recovery_cp != MaxSector) |
2309 | printk(KERN_NOTICE "raid10: %s is not clean" | 2345 | printk(KERN_NOTICE "raid10: %s is not clean" |
2310 | " -- starting background reconstruction\n", | 2346 | " -- starting background reconstruction\n", |
@@ -2316,8 +2352,10 @@ static int run(mddev_t *mddev) | |||
2316 | /* | 2352 | /* |
2317 | * Ok, everything is just fine now | 2353 | * Ok, everything is just fine now |
2318 | */ | 2354 | */ |
2319 | md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); | 2355 | mddev->dev_sectors = conf->dev_sectors; |
2320 | mddev->resync_max_sectors = raid10_size(mddev, 0, 0); | 2356 | size = raid10_size(mddev, 0, 0); |
2357 | md_set_array_sectors(mddev, size); | ||
2358 | mddev->resync_max_sectors = size; | ||
2321 | 2359 | ||
2322 | mddev->queue->unplug_fn = raid10_unplug; | 2360 | mddev->queue->unplug_fn = raid10_unplug; |
2323 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 2361 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; |
@@ -2347,6 +2385,7 @@ out_free_conf: | |||
2347 | kfree(conf->mirrors); | 2385 | kfree(conf->mirrors); |
2348 | kfree(conf); | 2386 | kfree(conf); |
2349 | mddev->private = NULL; | 2387 | mddev->private = NULL; |
2388 | md_unregister_thread(mddev->thread); | ||
2350 | out: | 2389 | out: |
2351 | return -EIO; | 2390 | return -EIO; |
2352 | } | 2391 | } |
@@ -2383,6 +2422,58 @@ static void raid10_quiesce(mddev_t *mddev, int state) | |||
2383 | } | 2422 | } |
2384 | } | 2423 | } |
2385 | 2424 | ||
2425 | static void *raid10_takeover_raid0(mddev_t *mddev) | ||
2426 | { | ||
2427 | mdk_rdev_t *rdev; | ||
2428 | conf_t *conf; | ||
2429 | |||
2430 | if (mddev->degraded > 0) { | ||
2431 | printk(KERN_ERR "error: degraded raid0!\n"); | ||
2432 | return ERR_PTR(-EINVAL); | ||
2433 | } | ||
2434 | |||
2435 | /* Update slot numbers to obtain | ||
2436 | * degraded raid10 with missing mirrors | ||
2437 | */ | ||
2438 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
2439 | rdev->raid_disk *= 2; | ||
2440 | } | ||
2441 | |||
2442 | /* Set new parameters */ | ||
2443 | mddev->new_level = 10; | ||
2444 | /* new layout: far_copies = 1, near_copies = 2 */ | ||
2445 | mddev->new_layout = (1<<8) + 2; | ||
2446 | mddev->new_chunk_sectors = mddev->chunk_sectors; | ||
2447 | mddev->delta_disks = mddev->raid_disks; | ||
2448 | mddev->degraded = mddev->raid_disks; | ||
2449 | mddev->raid_disks *= 2; | ||
2450 | /* make sure it will be not marked as dirty */ | ||
2451 | mddev->recovery_cp = MaxSector; | ||
2452 | |||
2453 | conf = setup_conf(mddev); | ||
2454 | conf->scale_disks = 2; | ||
2455 | return conf; | ||
2456 | } | ||
2457 | |||
2458 | static void *raid10_takeover(mddev_t *mddev) | ||
2459 | { | ||
2460 | struct raid0_private_data *raid0_priv; | ||
2461 | |||
2462 | /* raid10 can take over: | ||
2463 | * raid0 - providing it has only two drives | ||
2464 | */ | ||
2465 | if (mddev->level == 0) { | ||
2466 | /* for raid0 takeover only one zone is supported */ | ||
2467 | raid0_priv = mddev->private; | ||
2468 | if (raid0_priv->nr_strip_zones > 1) { | ||
2469 | printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n"); | ||
2470 | return ERR_PTR(-EINVAL); | ||
2471 | } | ||
2472 | return raid10_takeover_raid0(mddev); | ||
2473 | } | ||
2474 | return ERR_PTR(-EINVAL); | ||
2475 | } | ||
2476 | |||
2386 | static struct mdk_personality raid10_personality = | 2477 | static struct mdk_personality raid10_personality = |
2387 | { | 2478 | { |
2388 | .name = "raid10", | 2479 | .name = "raid10", |
@@ -2399,6 +2490,7 @@ static struct mdk_personality raid10_personality = | |||
2399 | .sync_request = sync_request, | 2490 | .sync_request = sync_request, |
2400 | .quiesce = raid10_quiesce, | 2491 | .quiesce = raid10_quiesce, |
2401 | .size = raid10_size, | 2492 | .size = raid10_size, |
2493 | .takeover = raid10_takeover, | ||
2402 | }; | 2494 | }; |
2403 | 2495 | ||
2404 | static int __init raid_init(void) | 2496 | static int __init raid_init(void) |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 59cd1efb8d30..3824a087e17c 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -33,9 +33,16 @@ struct r10_private_data_s { | |||
33 | * 1 stripe. | 33 | * 1 stripe. |
34 | */ | 34 | */ |
35 | 35 | ||
36 | sector_t dev_sectors; /* temp copy of mddev->dev_sectors */ | ||
37 | |||
36 | int chunk_shift; /* shift from chunks to sectors */ | 38 | int chunk_shift; /* shift from chunks to sectors */ |
37 | sector_t chunk_mask; | 39 | sector_t chunk_mask; |
38 | 40 | ||
41 | int scale_disks; /* When starting array, multiply | ||
42 | * each ->raid_disk by this. | ||
43 | * Need for raid0->raid10 migration | ||
44 | */ | ||
45 | |||
39 | struct list_head retry_list; | 46 | struct list_head retry_list; |
40 | /* queue pending writes and submit them on unplug */ | 47 | /* queue pending writes and submit them on unplug */ |
41 | struct bio_list pending_bio_list; | 48 | struct bio_list pending_bio_list; |
@@ -57,6 +64,11 @@ struct r10_private_data_s { | |||
57 | mempool_t *r10bio_pool; | 64 | mempool_t *r10bio_pool; |
58 | mempool_t *r10buf_pool; | 65 | mempool_t *r10buf_pool; |
59 | struct page *tmppage; | 66 | struct page *tmppage; |
67 | |||
68 | /* When taking over an array from a different personality, we store | ||
69 | * the new thread here until we fully activate the array. | ||
70 | */ | ||
71 | struct mdk_thread_s *thread; | ||
60 | }; | 72 | }; |
61 | 73 | ||
62 | typedef struct r10_private_data_s conf_t; | 74 | typedef struct r10_private_data_s conf_t; |