aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrela, Maciej <Maciej.Trela@intel.com>2010-03-08 00:02:45 -0500
committerNeilBrown <neilb@suse.de>2010-05-18 01:27:48 -0400
commitdab8b29248b3f14f456651a2a6ee9b8fd16d1b3c (patch)
treeced1c7cd74f2e0efc5a48819aa4711047960e2d5
parent9af204cf720cedf369cf823bbd806c350201f7ea (diff)
md: Add support for Raid0->Raid10 takeover
Signed-off-by: Maciej Trela <maciej.trela@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid10.c194
-rw-r--r--drivers/md/raid10.h12
2 files changed, 155 insertions, 51 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 044c1157d98d..57d71d5d88f4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -23,6 +23,7 @@
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24#include "md.h" 24#include "md.h"
25#include "raid10.h" 25#include "raid10.h"
26#include "raid0.h"
26#include "bitmap.h" 27#include "bitmap.h"
27 28
28/* 29/*
@@ -2141,7 +2142,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2141 if (!raid_disks) 2142 if (!raid_disks)
2142 raid_disks = conf->raid_disks; 2143 raid_disks = conf->raid_disks;
2143 if (!sectors) 2144 if (!sectors)
2144 sectors = mddev->dev_sectors; 2145 sectors = conf->dev_sectors;
2145 2146
2146 size = sectors >> conf->chunk_shift; 2147 size = sectors >> conf->chunk_shift;
2147 sector_div(size, conf->far_copies); 2148 sector_div(size, conf->far_copies);
@@ -2151,62 +2152,60 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2151 return size << conf->chunk_shift; 2152 return size << conf->chunk_shift;
2152} 2153}
2153 2154
2154static int run(mddev_t *mddev) 2155
2156static conf_t *setup_conf(mddev_t *mddev)
2155{ 2157{
2156 conf_t *conf; 2158 conf_t *conf = NULL;
2157 int i, disk_idx, chunk_size;
2158 mirror_info_t *disk;
2159 mdk_rdev_t *rdev;
2160 int nc, fc, fo; 2159 int nc, fc, fo;
2161 sector_t stride, size; 2160 sector_t stride, size;
2161 int err = -EINVAL;
2162 2162
2163 if (mddev->chunk_sectors < (PAGE_SIZE >> 9) || 2163 if (mddev->chunk_sectors < (PAGE_SIZE >> 9) ||
2164 !is_power_of_2(mddev->chunk_sectors)) { 2164 !is_power_of_2(mddev->chunk_sectors)) {
2165 printk(KERN_ERR "md/raid10: chunk size must be " 2165 printk(KERN_ERR "md/raid10: chunk size must be "
2166 "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE); 2166 "at least PAGE_SIZE(%ld) and be a power of 2.\n", PAGE_SIZE);
2167 return -EINVAL; 2167 goto out;
2168 } 2168 }
2169 2169
2170 nc = mddev->layout & 255; 2170 nc = mddev->layout & 255;
2171 fc = (mddev->layout >> 8) & 255; 2171 fc = (mddev->layout >> 8) & 255;
2172 fo = mddev->layout & (1<<16); 2172 fo = mddev->layout & (1<<16);
2173
2173 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks || 2174 if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
2174 (mddev->layout >> 17)) { 2175 (mddev->layout >> 17)) {
2175 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n", 2176 printk(KERN_ERR "raid10: %s: unsupported raid10 layout: 0x%8x\n",
2176 mdname(mddev), mddev->layout); 2177 mdname(mddev), mddev->layout);
2177 goto out; 2178 goto out;
2178 } 2179 }
2179 /* 2180
2180 * copy the already verified devices into our private RAID10 2181 err = -ENOMEM;
2181 * bookkeeping area. [whatever we allocate in run(),
2182 * should be freed in stop()]
2183 */
2184 conf = kzalloc(sizeof(conf_t), GFP_KERNEL); 2182 conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
2185 mddev->private = conf; 2183 if (!conf)
2186 if (!conf) {
2187 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
2188 mdname(mddev));
2189 goto out; 2184 goto out;
2190 } 2185
2191 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, 2186 conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
2192 GFP_KERNEL); 2187 GFP_KERNEL);
2193 if (!conf->mirrors) { 2188 if (!conf->mirrors)
2194 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n", 2189 goto out;
2195 mdname(mddev));
2196 goto out_free_conf;
2197 }
2198 2190
2199 conf->tmppage = alloc_page(GFP_KERNEL); 2191 conf->tmppage = alloc_page(GFP_KERNEL);
2200 if (!conf->tmppage) 2192 if (!conf->tmppage)
2201 goto out_free_conf; 2193 goto out;
2194
2202 2195
2203 conf->raid_disks = mddev->raid_disks; 2196 conf->raid_disks = mddev->raid_disks;
2204 conf->near_copies = nc; 2197 conf->near_copies = nc;
2205 conf->far_copies = fc; 2198 conf->far_copies = fc;
2206 conf->copies = nc*fc; 2199 conf->copies = nc*fc;
2207 conf->far_offset = fo; 2200 conf->far_offset = fo;
2208 conf->chunk_mask = mddev->chunk_sectors - 1; 2201 conf->chunk_mask = mddev->new_chunk_sectors - 1;
2209 conf->chunk_shift = ffz(~mddev->chunk_sectors); 2202 conf->chunk_shift = ffz(~mddev->new_chunk_sectors);
2203
2204 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
2205 r10bio_pool_free, conf);
2206 if (!conf->r10bio_pool)
2207 goto out;
2208
2210 size = mddev->dev_sectors >> conf->chunk_shift; 2209 size = mddev->dev_sectors >> conf->chunk_shift;
2211 sector_div(size, fc); 2210 sector_div(size, fc);
2212 size = size * conf->raid_disks; 2211 size = size * conf->raid_disks;
@@ -2220,7 +2219,8 @@ static int run(mddev_t *mddev)
2220 */ 2219 */
2221 stride += conf->raid_disks - 1; 2220 stride += conf->raid_disks - 1;
2222 sector_div(stride, conf->raid_disks); 2221 sector_div(stride, conf->raid_disks);
2223 mddev->dev_sectors = stride << conf->chunk_shift; 2222
2223 conf->dev_sectors = stride << conf->chunk_shift;
2224 2224
2225 if (fo) 2225 if (fo)
2226 stride = 1; 2226 stride = 1;
@@ -2228,18 +2228,63 @@ static int run(mddev_t *mddev)
2228 sector_div(stride, fc); 2228 sector_div(stride, fc);
2229 conf->stride = stride << conf->chunk_shift; 2229 conf->stride = stride << conf->chunk_shift;
2230 2230
2231 conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
2232 r10bio_pool_free, conf);
2233 if (!conf->r10bio_pool) {
2234 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
2235 mdname(mddev));
2236 goto out_free_conf;
2237 }
2238 2231
2239 conf->mddev = mddev;
2240 spin_lock_init(&conf->device_lock); 2232 spin_lock_init(&conf->device_lock);
2233 INIT_LIST_HEAD(&conf->retry_list);
2234
2235 spin_lock_init(&conf->resync_lock);
2236 init_waitqueue_head(&conf->wait_barrier);
2237
2238 conf->thread = md_register_thread(raid10d, mddev, NULL);
2239 if (!conf->thread)
2240 goto out;
2241
2242 conf->scale_disks = 0;
2243 conf->mddev = mddev;
2244 return conf;
2245
2246 out:
2247 printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
2248 mdname(mddev));
2249 if (conf) {
2250 if (conf->r10bio_pool)
2251 mempool_destroy(conf->r10bio_pool);
2252 kfree(conf->mirrors);
2253 safe_put_page(conf->tmppage);
2254 kfree(conf);
2255 }
2256 return ERR_PTR(err);
2257}
2258
2259static int run(mddev_t *mddev)
2260{
2261 conf_t *conf;
2262 int i, disk_idx, chunk_size;
2263 mirror_info_t *disk;
2264 mdk_rdev_t *rdev;
2265 sector_t size;
2266
2267 /*
2268 * copy the already verified devices into our private RAID10
2269 * bookkeeping area. [whatever we allocate in run(),
2270 * should be freed in stop()]
2271 */
2272
2273 if (mddev->private == NULL) {
2274 conf = setup_conf(mddev);
2275 if (IS_ERR(conf))
2276 return PTR_ERR(conf);
2277 mddev->private = conf;
2278 }
2279 conf = mddev->private;
2280 if (!conf)
2281 goto out;
2282
2241 mddev->queue->queue_lock = &conf->device_lock; 2283 mddev->queue->queue_lock = &conf->device_lock;
2242 2284
2285 mddev->thread = conf->thread;
2286 conf->thread = NULL;
2287
2243 chunk_size = mddev->chunk_sectors << 9; 2288 chunk_size = mddev->chunk_sectors << 9;
2244 blk_queue_io_min(mddev->queue, chunk_size); 2289 blk_queue_io_min(mddev->queue, chunk_size);
2245 if (conf->raid_disks % conf->near_copies) 2290 if (conf->raid_disks % conf->near_copies)
@@ -2253,6 +2298,11 @@ static int run(mddev_t *mddev)
2253 if (disk_idx >= conf->raid_disks 2298 if (disk_idx >= conf->raid_disks
2254 || disk_idx < 0) 2299 || disk_idx < 0)
2255 continue; 2300 continue;
2301 if (conf->scale_disks) {
2302 disk_idx *= conf->scale_disks;
2303 rdev->raid_disk = disk_idx;
2304 /* MOVE 'rd%d' link !! */
2305 }
2256 disk = conf->mirrors + disk_idx; 2306 disk = conf->mirrors + disk_idx;
2257 2307
2258 disk->rdev = rdev; 2308 disk->rdev = rdev;
@@ -2270,11 +2320,6 @@ static int run(mddev_t *mddev)
2270 2320
2271 disk->head_position = 0; 2321 disk->head_position = 0;
2272 } 2322 }
2273 INIT_LIST_HEAD(&conf->retry_list);
2274
2275 spin_lock_init(&conf->resync_lock);
2276 init_waitqueue_head(&conf->wait_barrier);
2277
2278 /* need to check that every block has at least one working mirror */ 2323 /* need to check that every block has at least one working mirror */
2279 if (!enough(conf)) { 2324 if (!enough(conf)) {
2280 printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", 2325 printk(KERN_ERR "raid10: not enough operational mirrors for %s\n",
@@ -2296,15 +2341,6 @@ static int run(mddev_t *mddev)
2296 } 2341 }
2297 } 2342 }
2298 2343
2299
2300 mddev->thread = md_register_thread(raid10d, mddev, NULL);
2301 if (!mddev->thread) {
2302 printk(KERN_ERR
2303 "raid10: couldn't allocate thread for %s\n",
2304 mdname(mddev));
2305 goto out_free_conf;
2306 }
2307
2308 if (mddev->recovery_cp != MaxSector) 2344 if (mddev->recovery_cp != MaxSector)
2309 printk(KERN_NOTICE "raid10: %s is not clean" 2345 printk(KERN_NOTICE "raid10: %s is not clean"
2310 " -- starting background reconstruction\n", 2346 " -- starting background reconstruction\n",
@@ -2316,8 +2352,10 @@ static int run(mddev_t *mddev)
2316 /* 2352 /*
2317 * Ok, everything is just fine now 2353 * Ok, everything is just fine now
2318 */ 2354 */
2319 md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); 2355 mddev->dev_sectors = conf->dev_sectors;
2320 mddev->resync_max_sectors = raid10_size(mddev, 0, 0); 2356 size = raid10_size(mddev, 0, 0);
2357 md_set_array_sectors(mddev, size);
2358 mddev->resync_max_sectors = size;
2321 2359
2322 mddev->queue->unplug_fn = raid10_unplug; 2360 mddev->queue->unplug_fn = raid10_unplug;
2323 mddev->queue->backing_dev_info.congested_fn = raid10_congested; 2361 mddev->queue->backing_dev_info.congested_fn = raid10_congested;
@@ -2347,6 +2385,7 @@ out_free_conf:
2347 kfree(conf->mirrors); 2385 kfree(conf->mirrors);
2348 kfree(conf); 2386 kfree(conf);
2349 mddev->private = NULL; 2387 mddev->private = NULL;
2388 md_unregister_thread(mddev->thread);
2350out: 2389out:
2351 return -EIO; 2390 return -EIO;
2352} 2391}
@@ -2383,6 +2422,58 @@ static void raid10_quiesce(mddev_t *mddev, int state)
2383 } 2422 }
2384} 2423}
2385 2424
2425static void *raid10_takeover_raid0(mddev_t *mddev)
2426{
2427 mdk_rdev_t *rdev;
2428 conf_t *conf;
2429
2430 if (mddev->degraded > 0) {
2431 printk(KERN_ERR "error: degraded raid0!\n");
2432 return ERR_PTR(-EINVAL);
2433 }
2434
2435 /* Update slot numbers to obtain
2436 * degraded raid10 with missing mirrors
2437 */
2438 list_for_each_entry(rdev, &mddev->disks, same_set) {
2439 rdev->raid_disk *= 2;
2440 }
2441
2442 /* Set new parameters */
2443 mddev->new_level = 10;
2444 /* new layout: far_copies = 1, near_copies = 2 */
2445 mddev->new_layout = (1<<8) + 2;
2446 mddev->new_chunk_sectors = mddev->chunk_sectors;
2447 mddev->delta_disks = mddev->raid_disks;
2448 mddev->degraded = mddev->raid_disks;
2449 mddev->raid_disks *= 2;
2450 /* make sure it will be not marked as dirty */
2451 mddev->recovery_cp = MaxSector;
2452
2453 conf = setup_conf(mddev);
2454 conf->scale_disks = 2;
2455 return conf;
2456}
2457
2458static void *raid10_takeover(mddev_t *mddev)
2459{
2460 struct raid0_private_data *raid0_priv;
2461
2462 /* raid10 can take over:
2463 * raid0 - providing it has only two drives
2464 */
2465 if (mddev->level == 0) {
2466 /* for raid0 takeover only one zone is supported */
2467 raid0_priv = mddev->private;
2468 if (raid0_priv->nr_strip_zones > 1) {
2469 printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n");
2470 return ERR_PTR(-EINVAL);
2471 }
2472 return raid10_takeover_raid0(mddev);
2473 }
2474 return ERR_PTR(-EINVAL);
2475}
2476
2386static struct mdk_personality raid10_personality = 2477static struct mdk_personality raid10_personality =
2387{ 2478{
2388 .name = "raid10", 2479 .name = "raid10",
@@ -2399,6 +2490,7 @@ static struct mdk_personality raid10_personality =
2399 .sync_request = sync_request, 2490 .sync_request = sync_request,
2400 .quiesce = raid10_quiesce, 2491 .quiesce = raid10_quiesce,
2401 .size = raid10_size, 2492 .size = raid10_size,
2493 .takeover = raid10_takeover,
2402}; 2494};
2403 2495
2404static int __init raid_init(void) 2496static int __init raid_init(void)
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 59cd1efb8d30..3824a087e17c 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -33,9 +33,16 @@ struct r10_private_data_s {
33 * 1 stripe. 33 * 1 stripe.
34 */ 34 */
35 35
36 sector_t dev_sectors; /* temp copy of mddev->dev_sectors */
37
36 int chunk_shift; /* shift from chunks to sectors */ 38 int chunk_shift; /* shift from chunks to sectors */
37 sector_t chunk_mask; 39 sector_t chunk_mask;
38 40
41 int scale_disks; /* When starting array, multiply
42 * each ->raid_disk by this.
43 * Need for raid0->raid10 migration
44 */
45
39 struct list_head retry_list; 46 struct list_head retry_list;
40 /* queue pending writes and submit them on unplug */ 47 /* queue pending writes and submit them on unplug */
41 struct bio_list pending_bio_list; 48 struct bio_list pending_bio_list;
@@ -57,6 +64,11 @@ struct r10_private_data_s {
57 mempool_t *r10bio_pool; 64 mempool_t *r10bio_pool;
58 mempool_t *r10buf_pool; 65 mempool_t *r10buf_pool;
59 struct page *tmppage; 66 struct page *tmppage;
67
68 /* When taking over an array from a different personality, we store
69 * the new thread here until we fully activate the array.
70 */
71 struct mdk_thread_s *thread;
60}; 72};
61 73
62typedef struct r10_private_data_s conf_t; 74typedef struct r10_private_data_s conf_t;