diff options
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/md/raid10.c | 194 | ||||
| -rw-r--r-- | drivers/md/raid10.h | 12 |
2 files changed, 155 insertions, 51 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 044c1157d98d..57d71d5d88f4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
| 24 | #include "md.h" | 24 | #include "md.h" |
| 25 | #include "raid10.h" | 25 | #include "raid10.h" |
| 26 | #include "raid0.h" | ||
| 26 | #include "bitmap.h" | 27 | #include "bitmap.h" |
| 27 | 28 | ||
| 28 | /* | 29 | /* |
| @@ -2141,7 +2142,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 2141 | if (!raid_disks) | 2142 | if (!raid_disks) |
| 2142 | raid_disks = conf->raid_disks; | 2143 | raid_disks = conf->raid_disks; |
| 2143 | if (!sectors) | 2144 | if (!sectors) |
| 2144 | sectors = mddev->dev_sectors; | 2145 | sectors = conf->dev_sectors; |
| 2145 | 2146 | ||
| 2146 | size = sectors >> conf->chunk_shift; | 2147 | size = sectors >> conf->chunk_shift; |
| 2147 | sector_div(size, conf->far_copies); | 2148 | sector_div(size, conf->far_copies); |
| @@ -2151,62 +2152,60 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 2151 | return size << conf->chunk_shift; | 2152 | return size << conf->chunk_shift; |
| 2152 | } | 2153 | } |
| 2153 | 2154 | ||
| 2154 | static int run(mddev_t *mddev) | 2155 | |
| 2156 | static conf_t *setup_conf(mddev_t *mddev) | ||
| 2155 | { | 2157 | { |
| 2156 | conf_t *conf; | 2158 | conf_t *conf = NULL; |
| 2157 | int i, disk_idx, chunk_size; | ||
| 2158 | mirror_info_t *disk; | ||
| 2159 | mdk_rdev_t *rdev; | ||
| 2160 | int nc, fc, fo; | 2159 | int nc, fc, fo; |
| 2161 | sector_t stride, size; | 2160 | sector_t stride, size; |
| 2161 | int err = -EINVAL; | ||
| 2162 | 2162 | ||
| 2163 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || | 2163 | if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || |
| 2164 | !is_power_of_2(mddev->chunk_sectors)) { | 2164 | !is_power_of_2(mddev->chunk_sectors)) { |
| 2165 | printk(KERN_ERR "md/raid10: chunk size must be " | 2165 | printk(KERN_ERR "md/raid10: chunk size must be " |
| 2166 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); | 2166 | "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); |
| 2167 | return -EINVAL; | 2167 | goto out; |
| 2168 | } | 2168 | } |
| 2169 | 2169 | ||
| 2170 | nc = mddev->layout & 255; | 2170 | nc = mddev->layout & 255; |
| 2171 | fc = (mddev->layout >> 8) & 255; | 2171 | fc = (mddev->layout >> 8) & 255; |
| 2172 | fo = mddev->layout & (1<<16); | 2172 | fo = mddev->layout & (1<<16); |
| 2173 | |||
| 2173 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || | 2174 | if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || |
| 2174 | (mddev->layout >> 17)) { | 2175 | (mddev->layout >> 17)) { |
| 2175 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", | 2176 | printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", |
| 2176 | mdname(mddev), mddev->layout); | 2177 | mdname(mddev), mddev->layout); |
| 2177 | goto out; | 2178 | goto out; |
| 2178 | } | 2179 | } |
| 2179 | /* | 2180 | |
| 2180 | * copy the already verified devices into our private RAID10 | 2181 | err = -ENOMEM; |
| 2181 | * bookkeeping area. [whatever we allocate in run(), | ||
| 2182 | * should be freed in stop()] | ||
| 2183 | */ | ||
| 2184 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); | 2182 | conf = kzalloc(sizeof(conf_t), GFP_KERNEL); |
| 2185 | mddev->private = conf; | 2183 | if (!conf) |
| 2186 | if (!conf) { | ||
| 2187 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
| 2188 | mdname(mddev)); | ||
| 2189 | goto out; | 2184 | goto out; |
| 2190 | } | 2185 | |
| 2191 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, | 2186 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, |
| 2192 | GFP_KERNEL); | 2187 | GFP_KERNEL); |
| 2193 | if (!conf->mirrors) { | 2188 | if (!conf->mirrors) |
| 2194 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | 2189 | goto out; |
| 2195 | mdname(mddev)); | ||
| 2196 | goto out_free_conf; | ||
| 2197 | } | ||
| 2198 | 2190 | ||
| 2199 | conf->tmppage = alloc_page(GFP_KERNEL); | 2191 | conf->tmppage = alloc_page(GFP_KERNEL); |
| 2200 | if (!conf->tmppage) | 2192 | if (!conf->tmppage) |
| 2201 | goto out_free_conf; | 2193 | goto out; |
| 2194 | |||
| 2202 | 2195 | ||
| 2203 | conf->raid_disks = mddev->raid_disks; | 2196 | conf->raid_disks = mddev->raid_disks; |
| 2204 | conf->near_copies = nc; | 2197 | conf->near_copies = nc; |
| 2205 | conf->far_copies = fc; | 2198 | conf->far_copies = fc; |
| 2206 | conf->copies = nc*fc; | 2199 | conf->copies = nc*fc; |
| 2207 | conf->far_offset = fo; | 2200 | conf->far_offset = fo; |
| 2208 | conf->chunk_mask = mddev->chunk_sectors - 1; | 2201 | conf->chunk_mask = mddev->new_chunk_sectors - 1; |
| 2209 | conf->chunk_shift = ffz(~mddev->chunk_sectors); | 2202 | conf->chunk_shift = ffz(~mddev->new_chunk_sectors); |
| 2203 | |||
| 2204 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
| 2205 | r10bio_pool_free, conf); | ||
| 2206 | if (!conf->r10bio_pool) | ||
| 2207 | goto out; | ||
| 2208 | |||
| 2210 | size = mddev->dev_sectors >> conf->chunk_shift; | 2209 | size = mddev->dev_sectors >> conf->chunk_shift; |
| 2211 | sector_div(size, fc); | 2210 | sector_div(size, fc); |
| 2212 | size = size * conf->raid_disks; | 2211 | size = size * conf->raid_disks; |
| @@ -2220,7 +2219,8 @@ static int run(mddev_t *mddev) | |||
| 2220 | */ | 2219 | */ |
| 2221 | stride += conf->raid_disks - 1; | 2220 | stride += conf->raid_disks - 1; |
| 2222 | sector_div(stride, conf->raid_disks); | 2221 | sector_div(stride, conf->raid_disks); |
| 2223 | mddev->dev_sectors = stride << conf->chunk_shift; | 2222 | |
| 2223 | conf->dev_sectors = stride << conf->chunk_shift; | ||
| 2224 | 2224 | ||
| 2225 | if (fo) | 2225 | if (fo) |
| 2226 | stride = 1; | 2226 | stride = 1; |
| @@ -2228,18 +2228,63 @@ static int run(mddev_t *mddev) | |||
| 2228 | sector_div(stride, fc); | 2228 | sector_div(stride, fc); |
| 2229 | conf->stride = stride << conf->chunk_shift; | 2229 | conf->stride = stride << conf->chunk_shift; |
| 2230 | 2230 | ||
| 2231 | conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc, | ||
| 2232 | r10bio_pool_free, conf); | ||
| 2233 | if (!conf->r10bio_pool) { | ||
| 2234 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
| 2235 | mdname(mddev)); | ||
| 2236 | goto out_free_conf; | ||
| 2237 | } | ||
| 2238 | 2231 | ||
| 2239 | conf->mddev = mddev; | ||
| 2240 | spin_lock_init(&conf->device_lock); | 2232 | spin_lock_init(&conf->device_lock); |
| 2233 | INIT_LIST_HEAD(&conf->retry_list); | ||
| 2234 | |||
| 2235 | spin_lock_init(&conf->resync_lock); | ||
| 2236 | init_waitqueue_head(&conf->wait_barrier); | ||
| 2237 | |||
| 2238 | conf->thread = md_register_thread(raid10d, mddev, NULL); | ||
| 2239 | if (!conf->thread) | ||
| 2240 | goto out; | ||
| 2241 | |||
| 2242 | conf->scale_disks = 0; | ||
| 2243 | conf->mddev = mddev; | ||
| 2244 | return conf; | ||
| 2245 | |||
| 2246 | out: | ||
| 2247 | printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", | ||
| 2248 | mdname(mddev)); | ||
| 2249 | if (conf) { | ||
| 2250 | if (conf->r10bio_pool) | ||
| 2251 | mempool_destroy(conf->r10bio_pool); | ||
| 2252 | kfree(conf->mirrors); | ||
| 2253 | safe_put_page(conf->tmppage); | ||
| 2254 | kfree(conf); | ||
| 2255 | } | ||
| 2256 | return ERR_PTR(err); | ||
| 2257 | } | ||
| 2258 | |||
| 2259 | static int run(mddev_t *mddev) | ||
| 2260 | { | ||
| 2261 | conf_t *conf; | ||
| 2262 | int i, disk_idx, chunk_size; | ||
| 2263 | mirror_info_t *disk; | ||
| 2264 | mdk_rdev_t *rdev; | ||
| 2265 | sector_t size; | ||
| 2266 | |||
| 2267 | /* | ||
| 2268 | * copy the already verified devices into our private RAID10 | ||
| 2269 | * bookkeeping area. [whatever we allocate in run(), | ||
| 2270 | * should be freed in stop()] | ||
| 2271 | */ | ||
| 2272 | |||
| 2273 | if (mddev->private == NULL) { | ||
| 2274 | conf = setup_conf(mddev); | ||
| 2275 | if (IS_ERR(conf)) | ||
| 2276 | return PTR_ERR(conf); | ||
| 2277 | mddev->private = conf; | ||
| 2278 | } | ||
| 2279 | conf = mddev->private; | ||
| 2280 | if (!conf) | ||
| 2281 | goto out; | ||
| 2282 | |||
| 2241 | mddev->queue->queue_lock = &conf->device_lock; | 2283 | mddev->queue->queue_lock = &conf->device_lock; |
| 2242 | 2284 | ||
| 2285 | mddev->thread = conf->thread; | ||
| 2286 | conf->thread = NULL; | ||
| 2287 | |||
| 2243 | chunk_size = mddev->chunk_sectors << 9; | 2288 | chunk_size = mddev->chunk_sectors << 9; |
| 2244 | blk_queue_io_min(mddev->queue, chunk_size); | 2289 | blk_queue_io_min(mddev->queue, chunk_size); |
| 2245 | if (conf->raid_disks % conf->near_copies) | 2290 | if (conf->raid_disks % conf->near_copies) |
| @@ -2253,6 +2298,11 @@ static int run(mddev_t *mddev) | |||
| 2253 | if (disk_idx >= conf->raid_disks | 2298 | if (disk_idx >= conf->raid_disks |
| 2254 | || disk_idx < 0) | 2299 | || disk_idx < 0) |
| 2255 | continue; | 2300 | continue; |
| 2301 | if (conf->scale_disks) { | ||
| 2302 | disk_idx *= conf->scale_disks; | ||
| 2303 | rdev->raid_disk = disk_idx; | ||
| 2304 | /* MOVE 'rd%d' link !! */ | ||
| 2305 | } | ||
| 2256 | disk = conf->mirrors + disk_idx; | 2306 | disk = conf->mirrors + disk_idx; |
| 2257 | 2307 | ||
| 2258 | disk->rdev = rdev; | 2308 | disk->rdev = rdev; |
| @@ -2270,11 +2320,6 @@ static int run(mddev_t *mddev) | |||
| 2270 | 2320 | ||
| 2271 | disk->head_position = 0; | 2321 | disk->head_position = 0; |
| 2272 | } | 2322 | } |
| 2273 | INIT_LIST_HEAD(&conf->retry_list); | ||
| 2274 | |||
| 2275 | spin_lock_init(&conf->resync_lock); | ||
| 2276 | init_waitqueue_head(&conf->wait_barrier); | ||
| 2277 | |||
| 2278 | /* need to check that every block has at least one working mirror */ | 2323 | /* need to check that every block has at least one working mirror */ |
| 2279 | if (!enough(conf)) { | 2324 | if (!enough(conf)) { |
| 2280 | printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", | 2325 | printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", |
| @@ -2296,15 +2341,6 @@ static int run(mddev_t *mddev) | |||
| 2296 | } | 2341 | } |
| 2297 | } | 2342 | } |
| 2298 | 2343 | ||
| 2299 | |||
| 2300 | mddev->thread = md_register_thread(raid10d, mddev, NULL); | ||
| 2301 | if (!mddev->thread) { | ||
| 2302 | printk(KERN_ERR | ||
| 2303 | "raid10: couldn't allocate thread for %s\n", | ||
| 2304 | mdname(mddev)); | ||
| 2305 | goto out_free_conf; | ||
| 2306 | } | ||
| 2307 | |||
| 2308 | if (mddev->recovery_cp != MaxSector) | 2344 | if (mddev->recovery_cp != MaxSector) |
| 2309 | printk(KERN_NOTICE "raid10: %s is not clean" | 2345 | printk(KERN_NOTICE "raid10: %s is not clean" |
| 2310 | " -- starting background reconstruction\n", | 2346 | " -- starting background reconstruction\n", |
| @@ -2316,8 +2352,10 @@ static int run(mddev_t *mddev) | |||
| 2316 | /* | 2352 | /* |
| 2317 | * Ok, everything is just fine now | 2353 | * Ok, everything is just fine now |
| 2318 | */ | 2354 | */ |
| 2319 | md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); | 2355 | mddev->dev_sectors = conf->dev_sectors; |
| 2320 | mddev->resync_max_sectors = raid10_size(mddev, 0, 0); | 2356 | size = raid10_size(mddev, 0, 0); |
| 2357 | md_set_array_sectors(mddev, size); | ||
| 2358 | mddev->resync_max_sectors = size; | ||
| 2321 | 2359 | ||
| 2322 | mddev->queue->unplug_fn = raid10_unplug; | 2360 | mddev->queue->unplug_fn = raid10_unplug; |
| 2323 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 2361 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; |
| @@ -2347,6 +2385,7 @@ out_free_conf: | |||
| 2347 | kfree(conf->mirrors); | 2385 | kfree(conf->mirrors); |
| 2348 | kfree(conf); | 2386 | kfree(conf); |
| 2349 | mddev->private = NULL; | 2387 | mddev->private = NULL; |
| 2388 | md_unregister_thread(mddev->thread); | ||
| 2350 | out: | 2389 | out: |
| 2351 | return -EIO; | 2390 | return -EIO; |
| 2352 | } | 2391 | } |
| @@ -2383,6 +2422,58 @@ static void raid10_quiesce(mddev_t *mddev, int state) | |||
| 2383 | } | 2422 | } |
| 2384 | } | 2423 | } |
| 2385 | 2424 | ||
| 2425 | static void *raid10_takeover_raid0(mddev_t *mddev) | ||
| 2426 | { | ||
| 2427 | mdk_rdev_t *rdev; | ||
| 2428 | conf_t *conf; | ||
| 2429 | |||
| 2430 | if (mddev->degraded > 0) { | ||
| 2431 | printk(KERN_ERR "error: degraded raid0!\n"); | ||
| 2432 | return ERR_PTR(-EINVAL); | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | /* Update slot numbers to obtain | ||
| 2436 | * degraded raid10 with missing mirrors | ||
| 2437 | */ | ||
| 2438 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
| 2439 | rdev->raid_disk *= 2; | ||
| 2440 | } | ||
| 2441 | |||
| 2442 | /* Set new parameters */ | ||
| 2443 | mddev->new_level = 10; | ||
| 2444 | /* new layout: far_copies = 1, near_copies = 2 */ | ||
| 2445 | mddev->new_layout = (1<<8) + 2; | ||
| 2446 | mddev->new_chunk_sectors = mddev->chunk_sectors; | ||
| 2447 | mddev->delta_disks = mddev->raid_disks; | ||
| 2448 | mddev->degraded = mddev->raid_disks; | ||
| 2449 | mddev->raid_disks *= 2; | ||
| 2450 | /* make sure it will be not marked as dirty */ | ||
| 2451 | mddev->recovery_cp = MaxSector; | ||
| 2452 | |||
| 2453 | conf = setup_conf(mddev); | ||
| 2454 | conf->scale_disks = 2; | ||
| 2455 | return conf; | ||
| 2456 | } | ||
| 2457 | |||
| 2458 | static void *raid10_takeover(mddev_t *mddev) | ||
| 2459 | { | ||
| 2460 | struct raid0_private_data *raid0_priv; | ||
| 2461 | |||
| 2462 | /* raid10 can take over: | ||
| 2463 | * raid0 - providing it has only two drives | ||
| 2464 | */ | ||
| 2465 | if (mddev->level == 0) { | ||
| 2466 | /* for raid0 takeover only one zone is supported */ | ||
| 2467 | raid0_priv = mddev->private; | ||
| 2468 | if (raid0_priv->nr_strip_zones > 1) { | ||
| 2469 | printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n"); | ||
| 2470 | return ERR_PTR(-EINVAL); | ||
| 2471 | } | ||
| 2472 | return raid10_takeover_raid0(mddev); | ||
| 2473 | } | ||
| 2474 | return ERR_PTR(-EINVAL); | ||
| 2475 | } | ||
| 2476 | |||
| 2386 | static struct mdk_personality raid10_personality = | 2477 | static struct mdk_personality raid10_personality = |
| 2387 | { | 2478 | { |
| 2388 | .name = "raid10", | 2479 | .name = "raid10", |
| @@ -2399,6 +2490,7 @@ static struct mdk_personality raid10_personality = | |||
| 2399 | .sync_request = sync_request, | 2490 | .sync_request = sync_request, |
| 2400 | .quiesce = raid10_quiesce, | 2491 | .quiesce = raid10_quiesce, |
| 2401 | .size = raid10_size, | 2492 | .size = raid10_size, |
| 2493 | .takeover = raid10_takeover, | ||
| 2402 | }; | 2494 | }; |
| 2403 | 2495 | ||
| 2404 | static int __init raid_init(void) | 2496 | static int __init raid_init(void) |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 59cd1efb8d30..3824a087e17c 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
| @@ -33,9 +33,16 @@ struct r10_private_data_s { | |||
| 33 | * 1 stripe. | 33 | * 1 stripe. |
| 34 | */ | 34 | */ |
| 35 | 35 | ||
| 36 | sector_t dev_sectors; /* temp copy of mddev->dev_sectors */ | ||
| 37 | |||
| 36 | int chunk_shift; /* shift from chunks to sectors */ | 38 | int chunk_shift; /* shift from chunks to sectors */ |
| 37 | sector_t chunk_mask; | 39 | sector_t chunk_mask; |
| 38 | 40 | ||
| 41 | int scale_disks; /* When starting array, multiply | ||
| 42 | * each ->raid_disk by this. | ||
| 43 | * Need for raid0->raid10 migration | ||
| 44 | */ | ||
| 45 | |||
| 39 | struct list_head retry_list; | 46 | struct list_head retry_list; |
| 40 | /* queue pending writes and submit them on unplug */ | 47 | /* queue pending writes and submit them on unplug */ |
| 41 | struct bio_list pending_bio_list; | 48 | struct bio_list pending_bio_list; |
| @@ -57,6 +64,11 @@ struct r10_private_data_s { | |||
| 57 | mempool_t *r10bio_pool; | 64 | mempool_t *r10bio_pool; |
| 58 | mempool_t *r10buf_pool; | 65 | mempool_t *r10buf_pool; |
| 59 | struct page *tmppage; | 66 | struct page *tmppage; |
| 67 | |||
| 68 | /* When taking over an array from a different personality, we store | ||
| 69 | * the new thread here until we fully activate the array. | ||
| 70 | */ | ||
| 71 | struct mdk_thread_s *thread; | ||
| 60 | }; | 72 | }; |
| 61 | 73 | ||
| 62 | typedef struct r10_private_data_s conf_t; | 74 | typedef struct r10_private_data_s conf_t; |
