diff options
author | NeilBrown <neilb@suse.de> | 2010-06-15 04:36:03 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2010-06-23 23:33:24 -0400 |
commit | e93f68a1fc6244c05ad8fae28e75835ec74ab34e (patch) | |
tree | d282978aac8f6fcec512be2a6e61287bbb6241b1 /drivers/md | |
parent | 0544a21db02c1d8883158fd6f323364f830a120a (diff) |
md: fix handling of array level takeover that re-arranges devices.
Most array level changes leave the list of devices largely unchanged,
possibly causing one at the end to become redundant.
However conversions between RAID0 and RAID10 need to renumber
all devices (except 0).
This renumbering is currently being done in the ->run method when the
new personality takes over. However this is too late as the common
code in md.c might already have invalidated some of the devices if
they had a ->raid_disk number that appeared to high.
Moving it into the ->takeover method is too early as the array is
still active at that time and wrong ->raid_disk numbers could cause
confusion.
So add a ->new_raid_disk field to mdk_rdev_s and use it to communicate
the new raid_disk number.
Now the common code knows exactly which devices need to be renumbered,
and which can be invalidated, and can do it all at a convenient time
when the array is suspend.
It can also update some symlinks in sysfs which previously were not be
updated correctly.
Reported-by: Maciej Trela <maciej.trela@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 35 | ||||
-rw-r--r-- | drivers/md/md.h | 3 | ||||
-rw-r--r-- | drivers/md/raid0.c | 11 | ||||
-rw-r--r-- | drivers/md/raid0.h | 3 | ||||
-rw-r--r-- | drivers/md/raid10.c | 19 | ||||
-rw-r--r-- | drivers/md/raid10.h | 5 |
6 files changed, 41 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4edcda8f486..4869128bf74 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3001,6 +3001,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
3001 | return -EINVAL; | 3001 | return -EINVAL; |
3002 | } | 3002 | } |
3003 | 3003 | ||
3004 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
3005 | rdev->new_raid_disk = rdev->raid_disk; | ||
3006 | |||
3004 | /* ->takeover must set new_* and/or delta_disks | 3007 | /* ->takeover must set new_* and/or delta_disks |
3005 | * if it succeeds, and may set them when it fails. | 3008 | * if it succeeds, and may set them when it fails. |
3006 | */ | 3009 | */ |
@@ -3051,13 +3054,35 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
3051 | mddev->safemode = 0; | 3054 | mddev->safemode = 0; |
3052 | } | 3055 | } |
3053 | 3056 | ||
3054 | module_put(mddev->pers->owner); | 3057 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
3055 | /* Invalidate devices that are now superfluous */ | 3058 | char nm[20]; |
3056 | list_for_each_entry(rdev, &mddev->disks, same_set) | 3059 | if (rdev->raid_disk < 0) |
3057 | if (rdev->raid_disk >= mddev->raid_disks) { | 3060 | continue; |
3058 | rdev->raid_disk = -1; | 3061 | if (rdev->new_raid_disk > mddev->raid_disks) |
3062 | rdev->new_raid_disk = -1; | ||
3063 | if (rdev->new_raid_disk == rdev->raid_disk) | ||
3064 | continue; | ||
3065 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
3066 | sysfs_remove_link(&mddev->kobj, nm); | ||
3067 | } | ||
3068 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
3069 | if (rdev->raid_disk < 0) | ||
3070 | continue; | ||
3071 | if (rdev->new_raid_disk == rdev->raid_disk) | ||
3072 | continue; | ||
3073 | rdev->raid_disk = rdev->new_raid_disk; | ||
3074 | if (rdev->raid_disk < 0) | ||
3059 | clear_bit(In_sync, &rdev->flags); | 3075 | clear_bit(In_sync, &rdev->flags); |
3076 | else { | ||
3077 | char nm[20]; | ||
3078 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
3079 | if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) | ||
3080 | printk("md: cannot register %s for %s after level change\n", | ||
3081 | nm, mdname(mddev)); | ||
3060 | } | 3082 | } |
3083 | } | ||
3084 | |||
3085 | module_put(mddev->pers->owner); | ||
3061 | mddev->pers = pers; | 3086 | mddev->pers = pers; |
3062 | mddev->private = priv; | 3087 | mddev->private = priv; |
3063 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 3088 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 7ab5ea15545..10597bfec00 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -78,6 +78,9 @@ struct mdk_rdev_s | |||
78 | 78 | ||
79 | int desc_nr; /* descriptor index in the superblock */ | 79 | int desc_nr; /* descriptor index in the superblock */ |
80 | int raid_disk; /* role of device in array */ | 80 | int raid_disk; /* role of device in array */ |
81 | int new_raid_disk; /* role that the device will have in | ||
82 | * the array after a level-change completes. | ||
83 | */ | ||
81 | int saved_raid_disk; /* role that device used to have in the | 84 | int saved_raid_disk; /* role that device used to have in the |
82 | * array and could again if we did a partial | 85 | * array and could again if we did a partial |
83 | * resync from the bitmap | 86 | * resync from the bitmap |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e70f004c99e..7c7c38058bc 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -173,9 +173,11 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf) | |||
173 | list_for_each_entry(rdev1, &mddev->disks, same_set) { | 173 | list_for_each_entry(rdev1, &mddev->disks, same_set) { |
174 | int j = rdev1->raid_disk; | 174 | int j = rdev1->raid_disk; |
175 | 175 | ||
176 | if (mddev->level == 10) | 176 | if (mddev->level == 10) { |
177 | /* taking over a raid10-n2 array */ | 177 | /* taking over a raid10-n2 array */ |
178 | j /= 2; | 178 | j /= 2; |
179 | rdev1->new_raid_disk = j; | ||
180 | } | ||
179 | 181 | ||
180 | if (j < 0 || j >= mddev->raid_disks) { | 182 | if (j < 0 || j >= mddev->raid_disks) { |
181 | printk(KERN_ERR "md/raid0:%s: bad disk number %d - " | 183 | printk(KERN_ERR "md/raid0:%s: bad disk number %d - " |
@@ -361,12 +363,6 @@ static int raid0_run(mddev_t *mddev) | |||
361 | mddev->private = conf; | 363 | mddev->private = conf; |
362 | } | 364 | } |
363 | conf = mddev->private; | 365 | conf = mddev->private; |
364 | if (conf->scale_raid_disks) { | ||
365 | int i; | ||
366 | for (i=0; i < conf->strip_zone[0].nb_dev; i++) | ||
367 | conf->devlist[i]->raid_disk /= conf->scale_raid_disks; | ||
368 | /* FIXME update sysfs rd links */ | ||
369 | } | ||
370 | 366 | ||
371 | /* calculate array device size */ | 367 | /* calculate array device size */ |
372 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); | 368 | md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); |
@@ -643,7 +639,6 @@ static void *raid0_takeover_raid10(mddev_t *mddev) | |||
643 | mddev->recovery_cp = MaxSector; | 639 | mddev->recovery_cp = MaxSector; |
644 | 640 | ||
645 | create_strip_zones(mddev, &priv_conf); | 641 | create_strip_zones(mddev, &priv_conf); |
646 | priv_conf->scale_raid_disks = 2; | ||
647 | return priv_conf; | 642 | return priv_conf; |
648 | } | 643 | } |
649 | 644 | ||
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index d724e664ca4..91f8e876ee6 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h | |||
@@ -13,9 +13,6 @@ struct raid0_private_data | |||
13 | struct strip_zone *strip_zone; | 13 | struct strip_zone *strip_zone; |
14 | mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ | 14 | mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ |
15 | int nr_strip_zones; | 15 | int nr_strip_zones; |
16 | int scale_raid_disks; /* divide rdev->raid_disks by this in run() | ||
17 | * to handle conversion from raid10 | ||
18 | */ | ||
19 | }; | 16 | }; |
20 | 17 | ||
21 | typedef struct raid0_private_data raid0_conf_t; | 18 | typedef struct raid0_private_data raid0_conf_t; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6d420cb487b..1bab3559f3e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -2241,7 +2241,6 @@ static conf_t *setup_conf(mddev_t *mddev) | |||
2241 | if (!conf->thread) | 2241 | if (!conf->thread) |
2242 | goto out; | 2242 | goto out; |
2243 | 2243 | ||
2244 | conf->scale_disks = 0; | ||
2245 | conf->mddev = mddev; | 2244 | conf->mddev = mddev; |
2246 | return conf; | 2245 | return conf; |
2247 | 2246 | ||
@@ -2300,11 +2299,6 @@ static int run(mddev_t *mddev) | |||
2300 | if (disk_idx >= conf->raid_disks | 2299 | if (disk_idx >= conf->raid_disks |
2301 | || disk_idx < 0) | 2300 | || disk_idx < 0) |
2302 | continue; | 2301 | continue; |
2303 | if (conf->scale_disks) { | ||
2304 | disk_idx *= conf->scale_disks; | ||
2305 | rdev->raid_disk = disk_idx; | ||
2306 | /* MOVE 'rd%d' link !! */ | ||
2307 | } | ||
2308 | disk = conf->mirrors + disk_idx; | 2302 | disk = conf->mirrors + disk_idx; |
2309 | 2303 | ||
2310 | disk->rdev = rdev; | 2304 | disk->rdev = rdev; |
@@ -2435,13 +2429,6 @@ static void *raid10_takeover_raid0(mddev_t *mddev) | |||
2435 | return ERR_PTR(-EINVAL); | 2429 | return ERR_PTR(-EINVAL); |
2436 | } | 2430 | } |
2437 | 2431 | ||
2438 | /* Update slot numbers to obtain | ||
2439 | * degraded raid10 with missing mirrors | ||
2440 | */ | ||
2441 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
2442 | rdev->raid_disk *= 2; | ||
2443 | } | ||
2444 | |||
2445 | /* Set new parameters */ | 2432 | /* Set new parameters */ |
2446 | mddev->new_level = 10; | 2433 | mddev->new_level = 10; |
2447 | /* new layout: far_copies = 1, near_copies = 2 */ | 2434 | /* new layout: far_copies = 1, near_copies = 2 */ |
@@ -2454,7 +2441,11 @@ static void *raid10_takeover_raid0(mddev_t *mddev) | |||
2454 | mddev->recovery_cp = MaxSector; | 2441 | mddev->recovery_cp = MaxSector; |
2455 | 2442 | ||
2456 | conf = setup_conf(mddev); | 2443 | conf = setup_conf(mddev); |
2457 | conf->scale_disks = 2; | 2444 | if (!IS_ERR(conf)) |
2445 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
2446 | if (rdev->raid_disk >= 0) | ||
2447 | rdev->new_raid_disk = rdev->raid_disk * 2; | ||
2448 | |||
2458 | return conf; | 2449 | return conf; |
2459 | } | 2450 | } |
2460 | 2451 | ||
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 3824a087e17..2316ac2e8e2 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -38,11 +38,6 @@ struct r10_private_data_s { | |||
38 | int chunk_shift; /* shift from chunks to sectors */ | 38 | int chunk_shift; /* shift from chunks to sectors */ |
39 | sector_t chunk_mask; | 39 | sector_t chunk_mask; |
40 | 40 | ||
41 | int scale_disks; /* When starting array, multiply | ||
42 | * each ->raid_disk by this. | ||
43 | * Need for raid0->raid10 migration | ||
44 | */ | ||
45 | |||
46 | struct list_head retry_list; | 41 | struct list_head retry_list; |
47 | /* queue pending writes and submit them on unplug */ | 42 | /* queue pending writes and submit them on unplug */ |
48 | struct bio_list pending_bio_list; | 43 | struct bio_list pending_bio_list; |