aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2010-06-15 04:36:03 -0400
committerNeilBrown <neilb@suse.de>2010-06-23 23:33:24 -0400
commite93f68a1fc6244c05ad8fae28e75835ec74ab34e (patch)
treed282978aac8f6fcec512be2a6e61287bbb6241b1 /drivers
parent0544a21db02c1d8883158fd6f323364f830a120a (diff)
md: fix handling of array level takeover that re-arranges devices.
Most array level changes leave the list of devices largely unchanged, possibly causing one at the end to become redundant. However conversions between RAID0 and RAID10 need to renumber all devices (except 0). This renumbering is currently being done in the ->run method when the new personality takes over. However this is too late as the common code in md.c might already have invalidated some of the devices if they had a ->raid_disk number that appeared to high. Moving it into the ->takeover method is too early as the array is still active at that time and wrong ->raid_disk numbers could cause confusion. So add a ->new_raid_disk field to mdk_rdev_s and use it to communicate the new raid_disk number. Now the common code knows exactly which devices need to be renumbered, and which can be invalidated, and can do it all at a convenient time when the array is suspend. It can also update some symlinks in sysfs which previously were not be updated correctly. Reported-by: Maciej Trela <maciej.trela@intel.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/md.c35
-rw-r--r--drivers/md/md.h3
-rw-r--r--drivers/md/raid0.c11
-rw-r--r--drivers/md/raid0.h3
-rw-r--r--drivers/md/raid10.c19
-rw-r--r--drivers/md/raid10.h5
6 files changed, 41 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4edcda8f4869..4869128bf742 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3001,6 +3001,9 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
3001 return -EINVAL; 3001 return -EINVAL;
3002 } 3002 }
3003 3003
3004 list_for_each_entry(rdev, &mddev->disks, same_set)
3005 rdev->new_raid_disk = rdev->raid_disk;
3006
3004 /* ->takeover must set new_* and/or delta_disks 3007 /* ->takeover must set new_* and/or delta_disks
3005 * if it succeeds, and may set them when it fails. 3008 * if it succeeds, and may set them when it fails.
3006 */ 3009 */
@@ -3051,13 +3054,35 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
3051 mddev->safemode = 0; 3054 mddev->safemode = 0;
3052 } 3055 }
3053 3056
3054 module_put(mddev->pers->owner); 3057 list_for_each_entry(rdev, &mddev->disks, same_set) {
3055 /* Invalidate devices that are now superfluous */ 3058 char nm[20];
3056 list_for_each_entry(rdev, &mddev->disks, same_set) 3059 if (rdev->raid_disk < 0)
3057 if (rdev->raid_disk >= mddev->raid_disks) { 3060 continue;
3058 rdev->raid_disk = -1; 3061 if (rdev->new_raid_disk > mddev->raid_disks)
3062 rdev->new_raid_disk = -1;
3063 if (rdev->new_raid_disk == rdev->raid_disk)
3064 continue;
3065 sprintf(nm, "rd%d", rdev->raid_disk);
3066 sysfs_remove_link(&mddev->kobj, nm);
3067 }
3068 list_for_each_entry(rdev, &mddev->disks, same_set) {
3069 if (rdev->raid_disk < 0)
3070 continue;
3071 if (rdev->new_raid_disk == rdev->raid_disk)
3072 continue;
3073 rdev->raid_disk = rdev->new_raid_disk;
3074 if (rdev->raid_disk < 0)
3059 clear_bit(In_sync, &rdev->flags); 3075 clear_bit(In_sync, &rdev->flags);
3076 else {
3077 char nm[20];
3078 sprintf(nm, "rd%d", rdev->raid_disk);
3079 if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm))
3080 printk("md: cannot register %s for %s after level change\n",
3081 nm, mdname(mddev));
3060 } 3082 }
3083 }
3084
3085 module_put(mddev->pers->owner);
3061 mddev->pers = pers; 3086 mddev->pers = pers;
3062 mddev->private = priv; 3087 mddev->private = priv;
3063 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 3088 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7ab5ea155452..10597bfec000 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -78,6 +78,9 @@ struct mdk_rdev_s
78 78
79 int desc_nr; /* descriptor index in the superblock */ 79 int desc_nr; /* descriptor index in the superblock */
80 int raid_disk; /* role of device in array */ 80 int raid_disk; /* role of device in array */
81 int new_raid_disk; /* role that the device will have in
82 * the array after a level-change completes.
83 */
81 int saved_raid_disk; /* role that device used to have in the 84 int saved_raid_disk; /* role that device used to have in the
82 * array and could again if we did a partial 85 * array and could again if we did a partial
83 * resync from the bitmap 86 * resync from the bitmap
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e70f004c99e8..7c7c38058bc2 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -173,9 +173,11 @@ static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
173 list_for_each_entry(rdev1, &mddev->disks, same_set) { 173 list_for_each_entry(rdev1, &mddev->disks, same_set) {
174 int j = rdev1->raid_disk; 174 int j = rdev1->raid_disk;
175 175
176 if (mddev->level == 10) 176 if (mddev->level == 10) {
177 /* taking over a raid10-n2 array */ 177 /* taking over a raid10-n2 array */
178 j /= 2; 178 j /= 2;
179 rdev1->new_raid_disk = j;
180 }
179 181
180 if (j < 0 || j >= mddev->raid_disks) { 182 if (j < 0 || j >= mddev->raid_disks) {
181 printk(KERN_ERR "md/raid0:%s: bad disk number %d - " 183 printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
@@ -361,12 +363,6 @@ static int raid0_run(mddev_t *mddev)
361 mddev->private = conf; 363 mddev->private = conf;
362 } 364 }
363 conf = mddev->private; 365 conf = mddev->private;
364 if (conf->scale_raid_disks) {
365 int i;
366 for (i=0; i < conf->strip_zone[0].nb_dev; i++)
367 conf->devlist[i]->raid_disk /= conf->scale_raid_disks;
368 /* FIXME update sysfs rd links */
369 }
370 366
371 /* calculate array device size */ 367 /* calculate array device size */
372 md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); 368 md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
@@ -643,7 +639,6 @@ static void *raid0_takeover_raid10(mddev_t *mddev)
643 mddev->recovery_cp = MaxSector; 639 mddev->recovery_cp = MaxSector;
644 640
645 create_strip_zones(mddev, &priv_conf); 641 create_strip_zones(mddev, &priv_conf);
646 priv_conf->scale_raid_disks = 2;
647 return priv_conf; 642 return priv_conf;
648} 643}
649 644
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
index d724e664ca4d..91f8e876ee64 100644
--- a/drivers/md/raid0.h
+++ b/drivers/md/raid0.h
@@ -13,9 +13,6 @@ struct raid0_private_data
13 struct strip_zone *strip_zone; 13 struct strip_zone *strip_zone;
14 mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */ 14 mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
15 int nr_strip_zones; 15 int nr_strip_zones;
16 int scale_raid_disks; /* divide rdev->raid_disks by this in run()
17 * to handle conversion from raid10
18 */
19}; 16};
20 17
21typedef struct raid0_private_data raid0_conf_t; 18typedef struct raid0_private_data raid0_conf_t;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6d420cb487b5..1bab3559f3e2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2241,7 +2241,6 @@ static conf_t *setup_conf(mddev_t *mddev)
2241 if (!conf->thread) 2241 if (!conf->thread)
2242 goto out; 2242 goto out;
2243 2243
2244 conf->scale_disks = 0;
2245 conf->mddev = mddev; 2244 conf->mddev = mddev;
2246 return conf; 2245 return conf;
2247 2246
@@ -2300,11 +2299,6 @@ static int run(mddev_t *mddev)
2300 if (disk_idx >= conf->raid_disks 2299 if (disk_idx >= conf->raid_disks
2301 || disk_idx < 0) 2300 || disk_idx < 0)
2302 continue; 2301 continue;
2303 if (conf->scale_disks) {
2304 disk_idx *= conf->scale_disks;
2305 rdev->raid_disk = disk_idx;
2306 /* MOVE 'rd%d' link !! */
2307 }
2308 disk = conf->mirrors + disk_idx; 2302 disk = conf->mirrors + disk_idx;
2309 2303
2310 disk->rdev = rdev; 2304 disk->rdev = rdev;
@@ -2435,13 +2429,6 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
2435 return ERR_PTR(-EINVAL); 2429 return ERR_PTR(-EINVAL);
2436 } 2430 }
2437 2431
2438 /* Update slot numbers to obtain
2439 * degraded raid10 with missing mirrors
2440 */
2441 list_for_each_entry(rdev, &mddev->disks, same_set) {
2442 rdev->raid_disk *= 2;
2443 }
2444
2445 /* Set new parameters */ 2432 /* Set new parameters */
2446 mddev->new_level = 10; 2433 mddev->new_level = 10;
2447 /* new layout: far_copies = 1, near_copies = 2 */ 2434 /* new layout: far_copies = 1, near_copies = 2 */
@@ -2454,7 +2441,11 @@ static void *raid10_takeover_raid0(mddev_t *mddev)
2454 mddev->recovery_cp = MaxSector; 2441 mddev->recovery_cp = MaxSector;
2455 2442
2456 conf = setup_conf(mddev); 2443 conf = setup_conf(mddev);
2457 conf->scale_disks = 2; 2444 if (!IS_ERR(conf))
2445 list_for_each_entry(rdev, &mddev->disks, same_set)
2446 if (rdev->raid_disk >= 0)
2447 rdev->new_raid_disk = rdev->raid_disk * 2;
2448
2458 return conf; 2449 return conf;
2459} 2450}
2460 2451
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 3824a087e17c..2316ac2e8e21 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -38,11 +38,6 @@ struct r10_private_data_s {
38 int chunk_shift; /* shift from chunks to sectors */ 38 int chunk_shift; /* shift from chunks to sectors */
39 sector_t chunk_mask; 39 sector_t chunk_mask;
40 40
41 int scale_disks; /* When starting array, multiply
42 * each ->raid_disk by this.
43 * Need for raid0->raid10 migration
44 */
45
46 struct list_head retry_list; 41 struct list_head retry_list;
47 /* queue pending writes and submit them on unplug */ 42 /* queue pending writes and submit them on unplug */
48 struct bio_list pending_bio_list; 43 struct bio_list pending_bio_list;