aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c235
1 files changed, 199 insertions, 36 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5ed2228745cb..039e071c1007 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -43,6 +43,7 @@
43#include <linux/buffer_head.h> /* for invalidate_bdev */ 43#include <linux/buffer_head.h> /* for invalidate_bdev */
44#include <linux/suspend.h> 44#include <linux/suspend.h>
45#include <linux/poll.h> 45#include <linux/poll.h>
46#include <linux/mutex.h>
46 47
47#include <linux/init.h> 48#include <linux/init.h>
48 49
@@ -158,11 +159,12 @@ static int start_readonly;
158 */ 159 */
159static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); 160static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
160static atomic_t md_event_count; 161static atomic_t md_event_count;
161static void md_new_event(mddev_t *mddev) 162void md_new_event(mddev_t *mddev)
162{ 163{
163 atomic_inc(&md_event_count); 164 atomic_inc(&md_event_count);
164 wake_up(&md_event_waiters); 165 wake_up(&md_event_waiters);
165} 166}
167EXPORT_SYMBOL_GPL(md_new_event);
166 168
167/* 169/*
168 * Enables to iterate over all existing md arrays 170 * Enables to iterate over all existing md arrays
@@ -253,7 +255,7 @@ static mddev_t * mddev_find(dev_t unit)
253 else 255 else
254 new->md_minor = MINOR(unit) >> MdpMinorShift; 256 new->md_minor = MINOR(unit) >> MdpMinorShift;
255 257
256 init_MUTEX(&new->reconfig_sem); 258 mutex_init(&new->reconfig_mutex);
257 INIT_LIST_HEAD(&new->disks); 259 INIT_LIST_HEAD(&new->disks);
258 INIT_LIST_HEAD(&new->all_mddevs); 260 INIT_LIST_HEAD(&new->all_mddevs);
259 init_timer(&new->safemode_timer); 261 init_timer(&new->safemode_timer);
@@ -266,6 +268,7 @@ static mddev_t * mddev_find(dev_t unit)
266 kfree(new); 268 kfree(new);
267 return NULL; 269 return NULL;
268 } 270 }
271 set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
269 272
270 blk_queue_make_request(new->queue, md_fail_request); 273 blk_queue_make_request(new->queue, md_fail_request);
271 274
@@ -274,22 +277,22 @@ static mddev_t * mddev_find(dev_t unit)
274 277
275static inline int mddev_lock(mddev_t * mddev) 278static inline int mddev_lock(mddev_t * mddev)
276{ 279{
277 return down_interruptible(&mddev->reconfig_sem); 280 return mutex_lock_interruptible(&mddev->reconfig_mutex);
278} 281}
279 282
280static inline void mddev_lock_uninterruptible(mddev_t * mddev) 283static inline void mddev_lock_uninterruptible(mddev_t * mddev)
281{ 284{
282 down(&mddev->reconfig_sem); 285 mutex_lock(&mddev->reconfig_mutex);
283} 286}
284 287
285static inline int mddev_trylock(mddev_t * mddev) 288static inline int mddev_trylock(mddev_t * mddev)
286{ 289{
287 return down_trylock(&mddev->reconfig_sem); 290 return mutex_trylock(&mddev->reconfig_mutex);
288} 291}
289 292
290static inline void mddev_unlock(mddev_t * mddev) 293static inline void mddev_unlock(mddev_t * mddev)
291{ 294{
292 up(&mddev->reconfig_sem); 295 mutex_unlock(&mddev->reconfig_mutex);
293 296
294 md_wakeup_thread(mddev->thread); 297 md_wakeup_thread(mddev->thread);
295} 298}
@@ -660,7 +663,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
660 } 663 }
661 664
662 if (sb->major_version != 0 || 665 if (sb->major_version != 0 ||
663 sb->minor_version != 90) { 666 sb->minor_version < 90 ||
667 sb->minor_version > 91) {
664 printk(KERN_WARNING "Bad version number %d.%d on %s\n", 668 printk(KERN_WARNING "Bad version number %d.%d on %s\n",
665 sb->major_version, sb->minor_version, 669 sb->major_version, sb->minor_version,
666 b); 670 b);
@@ -745,6 +749,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
745 mddev->bitmap_offset = 0; 749 mddev->bitmap_offset = 0;
746 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 750 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
747 751
752 if (mddev->minor_version >= 91) {
753 mddev->reshape_position = sb->reshape_position;
754 mddev->delta_disks = sb->delta_disks;
755 mddev->new_level = sb->new_level;
756 mddev->new_layout = sb->new_layout;
757 mddev->new_chunk = sb->new_chunk;
758 } else {
759 mddev->reshape_position = MaxSector;
760 mddev->delta_disks = 0;
761 mddev->new_level = mddev->level;
762 mddev->new_layout = mddev->layout;
763 mddev->new_chunk = mddev->chunk_size;
764 }
765
748 if (sb->state & (1<<MD_SB_CLEAN)) 766 if (sb->state & (1<<MD_SB_CLEAN))
749 mddev->recovery_cp = MaxSector; 767 mddev->recovery_cp = MaxSector;
750 else { 768 else {
@@ -764,7 +782,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
764 782
765 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && 783 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
766 mddev->bitmap_file == NULL) { 784 mddev->bitmap_file == NULL) {
767 if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 785 if (mddev->level != 1 && mddev->level != 4
786 && mddev->level != 5 && mddev->level != 6
768 && mddev->level != 10) { 787 && mddev->level != 10) {
769 /* FIXME use a better test */ 788 /* FIXME use a better test */
770 printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); 789 printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
@@ -838,7 +857,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
838 857
839 sb->md_magic = MD_SB_MAGIC; 858 sb->md_magic = MD_SB_MAGIC;
840 sb->major_version = mddev->major_version; 859 sb->major_version = mddev->major_version;
841 sb->minor_version = mddev->minor_version;
842 sb->patch_version = mddev->patch_version; 860 sb->patch_version = mddev->patch_version;
843 sb->gvalid_words = 0; /* ignored */ 861 sb->gvalid_words = 0; /* ignored */
844 memcpy(&sb->set_uuid0, mddev->uuid+0, 4); 862 memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
@@ -857,6 +875,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
857 sb->events_hi = (mddev->events>>32); 875 sb->events_hi = (mddev->events>>32);
858 sb->events_lo = (u32)mddev->events; 876 sb->events_lo = (u32)mddev->events;
859 877
878 if (mddev->reshape_position == MaxSector)
879 sb->minor_version = 90;
880 else {
881 sb->minor_version = 91;
882 sb->reshape_position = mddev->reshape_position;
883 sb->new_level = mddev->new_level;
884 sb->delta_disks = mddev->delta_disks;
885 sb->new_layout = mddev->new_layout;
886 sb->new_chunk = mddev->new_chunk;
887 }
888 mddev->minor_version = sb->minor_version;
860 if (mddev->in_sync) 889 if (mddev->in_sync)
861 { 890 {
862 sb->recovery_cp = mddev->recovery_cp; 891 sb->recovery_cp = mddev->recovery_cp;
@@ -893,10 +922,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
893 d->raid_disk = rdev2->raid_disk; 922 d->raid_disk = rdev2->raid_disk;
894 else 923 else
895 d->raid_disk = rdev2->desc_nr; /* compatibility */ 924 d->raid_disk = rdev2->desc_nr; /* compatibility */
896 if (test_bit(Faulty, &rdev2->flags)) { 925 if (test_bit(Faulty, &rdev2->flags))
897 d->state = (1<<MD_DISK_FAULTY); 926 d->state = (1<<MD_DISK_FAULTY);
898 failed++; 927 else if (test_bit(In_sync, &rdev2->flags)) {
899 } else if (test_bit(In_sync, &rdev2->flags)) {
900 d->state = (1<<MD_DISK_ACTIVE); 928 d->state = (1<<MD_DISK_ACTIVE);
901 d->state |= (1<<MD_DISK_SYNC); 929 d->state |= (1<<MD_DISK_SYNC);
902 active++; 930 active++;
@@ -1102,6 +1130,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1102 } 1130 }
1103 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); 1131 mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
1104 } 1132 }
1133 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
1134 mddev->reshape_position = le64_to_cpu(sb->reshape_position);
1135 mddev->delta_disks = le32_to_cpu(sb->delta_disks);
1136 mddev->new_level = le32_to_cpu(sb->new_level);
1137 mddev->new_layout = le32_to_cpu(sb->new_layout);
1138 mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9;
1139 } else {
1140 mddev->reshape_position = MaxSector;
1141 mddev->delta_disks = 0;
1142 mddev->new_level = mddev->level;
1143 mddev->new_layout = mddev->layout;
1144 mddev->new_chunk = mddev->chunk_size;
1145 }
1146
1105 } else if (mddev->pers == NULL) { 1147 } else if (mddev->pers == NULL) {
1106 /* Insist of good event counter while assembling */ 1148 /* Insist of good event counter while assembling */
1107 __u64 ev1 = le64_to_cpu(sb->events); 1149 __u64 ev1 = le64_to_cpu(sb->events);
@@ -1173,6 +1215,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1173 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1215 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1174 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1216 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1175 } 1217 }
1218 if (mddev->reshape_position != MaxSector) {
1219 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1220 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
1221 sb->new_layout = cpu_to_le32(mddev->new_layout);
1222 sb->delta_disks = cpu_to_le32(mddev->delta_disks);
1223 sb->new_level = cpu_to_le32(mddev->new_level);
1224 sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9);
1225 }
1176 1226
1177 max_dev = 0; 1227 max_dev = 0;
1178 ITERATE_RDEV(mddev,rdev2,tmp) 1228 ITERATE_RDEV(mddev,rdev2,tmp)
@@ -1301,6 +1351,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1301 else 1351 else
1302 ko = &rdev->bdev->bd_disk->kobj; 1352 ko = &rdev->bdev->bd_disk->kobj;
1303 sysfs_create_link(&rdev->kobj, ko, "block"); 1353 sysfs_create_link(&rdev->kobj, ko, "block");
1354 bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk);
1304 return 0; 1355 return 0;
1305} 1356}
1306 1357
@@ -1311,6 +1362,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1311 MD_BUG(); 1362 MD_BUG();
1312 return; 1363 return;
1313 } 1364 }
1365 bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk);
1314 list_del_init(&rdev->same_set); 1366 list_del_init(&rdev->same_set);
1315 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); 1367 printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
1316 rdev->mddev = NULL; 1368 rdev->mddev = NULL;
@@ -1493,7 +1545,7 @@ static void sync_sbs(mddev_t * mddev)
1493 } 1545 }
1494} 1546}
1495 1547
1496static void md_update_sb(mddev_t * mddev) 1548void md_update_sb(mddev_t * mddev)
1497{ 1549{
1498 int err; 1550 int err;
1499 struct list_head *tmp; 1551 struct list_head *tmp;
@@ -1570,6 +1622,7 @@ repeat:
1570 wake_up(&mddev->sb_wait); 1622 wake_up(&mddev->sb_wait);
1571 1623
1572} 1624}
1625EXPORT_SYMBOL_GPL(md_update_sb);
1573 1626
1574/* words written to sysfs files may, or my not, be \n terminated. 1627/* words written to sysfs files may, or my not, be \n terminated.
1575 * We want to accept with case. For this we use cmd_match. 1628 * We want to accept with case. For this we use cmd_match.
@@ -2162,7 +2215,9 @@ action_show(mddev_t *mddev, char *page)
2162 char *type = "idle"; 2215 char *type = "idle";
2163 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 2216 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
2164 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { 2217 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) {
2165 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 2218 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
2219 type = "reshape";
2220 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2166 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 2221 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
2167 type = "resync"; 2222 type = "resync";
2168 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 2223 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
@@ -2193,7 +2248,14 @@ action_store(mddev_t *mddev, const char *page, size_t len)
2193 return -EBUSY; 2248 return -EBUSY;
2194 else if (cmd_match(page, "resync") || cmd_match(page, "recover")) 2249 else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
2195 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2250 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2196 else { 2251 else if (cmd_match(page, "reshape")) {
2252 int err;
2253 if (mddev->pers->start_reshape == NULL)
2254 return -EINVAL;
2255 err = mddev->pers->start_reshape(mddev);
2256 if (err)
2257 return err;
2258 } else {
2197 if (cmd_match(page, "check")) 2259 if (cmd_match(page, "check"))
2198 set_bit(MD_RECOVERY_CHECK, &mddev->recovery); 2260 set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
2199 else if (cmd_match(page, "repair")) 2261 else if (cmd_match(page, "repair"))
@@ -2304,6 +2366,63 @@ sync_completed_show(mddev_t *mddev, char *page)
2304static struct md_sysfs_entry 2366static struct md_sysfs_entry
2305md_sync_completed = __ATTR_RO(sync_completed); 2367md_sync_completed = __ATTR_RO(sync_completed);
2306 2368
2369static ssize_t
2370suspend_lo_show(mddev_t *mddev, char *page)
2371{
2372 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
2373}
2374
2375static ssize_t
2376suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
2377{
2378 char *e;
2379 unsigned long long new = simple_strtoull(buf, &e, 10);
2380
2381 if (mddev->pers->quiesce == NULL)
2382 return -EINVAL;
2383 if (buf == e || (*e && *e != '\n'))
2384 return -EINVAL;
2385 if (new >= mddev->suspend_hi ||
2386 (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
2387 mddev->suspend_lo = new;
2388 mddev->pers->quiesce(mddev, 2);
2389 return len;
2390 } else
2391 return -EINVAL;
2392}
2393static struct md_sysfs_entry md_suspend_lo =
2394__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
2395
2396
2397static ssize_t
2398suspend_hi_show(mddev_t *mddev, char *page)
2399{
2400 return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
2401}
2402
2403static ssize_t
2404suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
2405{
2406 char *e;
2407 unsigned long long new = simple_strtoull(buf, &e, 10);
2408
2409 if (mddev->pers->quiesce == NULL)
2410 return -EINVAL;
2411 if (buf == e || (*e && *e != '\n'))
2412 return -EINVAL;
2413 if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||
2414 (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
2415 mddev->suspend_hi = new;
2416 mddev->pers->quiesce(mddev, 1);
2417 mddev->pers->quiesce(mddev, 0);
2418 return len;
2419 } else
2420 return -EINVAL;
2421}
2422static struct md_sysfs_entry md_suspend_hi =
2423__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
2424
2425
2307static struct attribute *md_default_attrs[] = { 2426static struct attribute *md_default_attrs[] = {
2308 &md_level.attr, 2427 &md_level.attr,
2309 &md_raid_disks.attr, 2428 &md_raid_disks.attr,
@@ -2321,6 +2440,8 @@ static struct attribute *md_redundancy_attrs[] = {
2321 &md_sync_max.attr, 2440 &md_sync_max.attr,
2322 &md_sync_speed.attr, 2441 &md_sync_speed.attr,
2323 &md_sync_completed.attr, 2442 &md_sync_completed.attr,
2443 &md_suspend_lo.attr,
2444 &md_suspend_hi.attr,
2324 NULL, 2445 NULL,
2325}; 2446};
2326static struct attribute_group md_redundancy_group = { 2447static struct attribute_group md_redundancy_group = {
@@ -2380,7 +2501,7 @@ int mdp_major = 0;
2380 2501
2381static struct kobject *md_probe(dev_t dev, int *part, void *data) 2502static struct kobject *md_probe(dev_t dev, int *part, void *data)
2382{ 2503{
2383 static DECLARE_MUTEX(disks_sem); 2504 static DEFINE_MUTEX(disks_mutex);
2384 mddev_t *mddev = mddev_find(dev); 2505 mddev_t *mddev = mddev_find(dev);
2385 struct gendisk *disk; 2506 struct gendisk *disk;
2386 int partitioned = (MAJOR(dev) != MD_MAJOR); 2507 int partitioned = (MAJOR(dev) != MD_MAJOR);
@@ -2390,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2390 if (!mddev) 2511 if (!mddev)
2391 return NULL; 2512 return NULL;
2392 2513
2393 down(&disks_sem); 2514 mutex_lock(&disks_mutex);
2394 if (mddev->gendisk) { 2515 if (mddev->gendisk) {
2395 up(&disks_sem); 2516 mutex_unlock(&disks_mutex);
2396 mddev_put(mddev); 2517 mddev_put(mddev);
2397 return NULL; 2518 return NULL;
2398 } 2519 }
2399 disk = alloc_disk(1 << shift); 2520 disk = alloc_disk(1 << shift);
2400 if (!disk) { 2521 if (!disk) {
2401 up(&disks_sem); 2522 mutex_unlock(&disks_mutex);
2402 mddev_put(mddev); 2523 mddev_put(mddev);
2403 return NULL; 2524 return NULL;
2404 } 2525 }
@@ -2416,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2416 disk->queue = mddev->queue; 2537 disk->queue = mddev->queue;
2417 add_disk(disk); 2538 add_disk(disk);
2418 mddev->gendisk = disk; 2539 mddev->gendisk = disk;
2419 up(&disks_sem); 2540 mutex_unlock(&disks_mutex);
2420 mddev->kobj.parent = &disk->kobj; 2541 mddev->kobj.parent = &disk->kobj;
2421 mddev->kobj.k_name = NULL; 2542 mddev->kobj.k_name = NULL;
2422 snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); 2543 snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md");
@@ -2539,6 +2660,14 @@ static int do_md_run(mddev_t * mddev)
2539 mddev->level = pers->level; 2660 mddev->level = pers->level;
2540 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 2661 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
2541 2662
2663 if (mddev->reshape_position != MaxSector &&
2664 pers->start_reshape == NULL) {
2665 /* This personality cannot handle reshaping... */
2666 mddev->pers = NULL;
2667 module_put(pers->owner);
2668 return -EINVAL;
2669 }
2670
2542 mddev->recovery = 0; 2671 mddev->recovery = 0;
2543 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ 2672 mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
2544 mddev->barriers_work = 1; 2673 mddev->barriers_work = 1;
@@ -2772,7 +2901,6 @@ static void autorun_array(mddev_t *mddev)
2772 */ 2901 */
2773static void autorun_devices(int part) 2902static void autorun_devices(int part)
2774{ 2903{
2775 struct list_head candidates;
2776 struct list_head *tmp; 2904 struct list_head *tmp;
2777 mdk_rdev_t *rdev0, *rdev; 2905 mdk_rdev_t *rdev0, *rdev;
2778 mddev_t *mddev; 2906 mddev_t *mddev;
@@ -2781,6 +2909,7 @@ static void autorun_devices(int part)
2781 printk(KERN_INFO "md: autorun ...\n"); 2909 printk(KERN_INFO "md: autorun ...\n");
2782 while (!list_empty(&pending_raid_disks)) { 2910 while (!list_empty(&pending_raid_disks)) {
2783 dev_t dev; 2911 dev_t dev;
2912 LIST_HEAD(candidates);
2784 rdev0 = list_entry(pending_raid_disks.next, 2913 rdev0 = list_entry(pending_raid_disks.next,
2785 mdk_rdev_t, same_set); 2914 mdk_rdev_t, same_set);
2786 2915
@@ -3427,11 +3556,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
3427 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 3556 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
3428 mddev->bitmap_offset = 0; 3557 mddev->bitmap_offset = 0;
3429 3558
3559 mddev->reshape_position = MaxSector;
3560
3430 /* 3561 /*
3431 * Generate a 128 bit UUID 3562 * Generate a 128 bit UUID
3432 */ 3563 */
3433 get_random_bytes(mddev->uuid, 16); 3564 get_random_bytes(mddev->uuid, 16);
3434 3565
3566 mddev->new_level = mddev->level;
3567 mddev->new_chunk = mddev->chunk_size;
3568 mddev->new_layout = mddev->layout;
3569 mddev->delta_disks = 0;
3570
3435 return 0; 3571 return 0;
3436} 3572}
3437 3573
@@ -3440,6 +3576,7 @@ static int update_size(mddev_t *mddev, unsigned long size)
3440 mdk_rdev_t * rdev; 3576 mdk_rdev_t * rdev;
3441 int rv; 3577 int rv;
3442 struct list_head *tmp; 3578 struct list_head *tmp;
3579 int fit = (size == 0);
3443 3580
3444 if (mddev->pers->resize == NULL) 3581 if (mddev->pers->resize == NULL)
3445 return -EINVAL; 3582 return -EINVAL;
@@ -3457,7 +3594,6 @@ static int update_size(mddev_t *mddev, unsigned long size)
3457 return -EBUSY; 3594 return -EBUSY;
3458 ITERATE_RDEV(mddev,rdev,tmp) { 3595 ITERATE_RDEV(mddev,rdev,tmp) {
3459 sector_t avail; 3596 sector_t avail;
3460 int fit = (size == 0);
3461 if (rdev->sb_offset > rdev->data_offset) 3597 if (rdev->sb_offset > rdev->data_offset)
3462 avail = (rdev->sb_offset*2) - rdev->data_offset; 3598 avail = (rdev->sb_offset*2) - rdev->data_offset;
3463 else 3599 else
@@ -3487,14 +3623,16 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks)
3487{ 3623{
3488 int rv; 3624 int rv;
3489 /* change the number of raid disks */ 3625 /* change the number of raid disks */
3490 if (mddev->pers->reshape == NULL) 3626 if (mddev->pers->check_reshape == NULL)
3491 return -EINVAL; 3627 return -EINVAL;
3492 if (raid_disks <= 0 || 3628 if (raid_disks <= 0 ||
3493 raid_disks >= mddev->max_disks) 3629 raid_disks >= mddev->max_disks)
3494 return -EINVAL; 3630 return -EINVAL;
3495 if (mddev->sync_thread) 3631 if (mddev->sync_thread || mddev->reshape_position != MaxSector)
3496 return -EBUSY; 3632 return -EBUSY;
3497 rv = mddev->pers->reshape(mddev, raid_disks); 3633 mddev->delta_disks = raid_disks - mddev->raid_disks;
3634
3635 rv = mddev->pers->check_reshape(mddev);
3498 return rv; 3636 return rv;
3499} 3637}
3500 3638
@@ -4041,7 +4179,10 @@ static void status_unused(struct seq_file *seq)
4041 4179
4042static void status_resync(struct seq_file *seq, mddev_t * mddev) 4180static void status_resync(struct seq_file *seq, mddev_t * mddev)
4043{ 4181{
4044 unsigned long max_blocks, resync, res, dt, db, rt; 4182 sector_t max_blocks, resync, res;
4183 unsigned long dt, db, rt;
4184 int scale;
4185 unsigned int per_milli;
4045 4186
4046 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; 4187 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
4047 4188
@@ -4057,9 +4198,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
4057 MD_BUG(); 4198 MD_BUG();
4058 return; 4199 return;
4059 } 4200 }
4060 res = (resync/1024)*1000/(max_blocks/1024 + 1); 4201 /* Pick 'scale' such that (resync>>scale)*1000 will fit
4202 * in a sector_t, and (max_blocks>>scale) will fit in a
4203 * u32, as those are the requirements for sector_div.
4204 * Thus 'scale' must be at least 10
4205 */
4206 scale = 10;
4207 if (sizeof(sector_t) > sizeof(unsigned long)) {
4208 while ( max_blocks/2 > (1ULL<<(scale+32)))
4209 scale++;
4210 }
4211 res = (resync>>scale)*1000;
4212 sector_div(res, (u32)((max_blocks>>scale)+1));
4213
4214 per_milli = res;
4061 { 4215 {
4062 int i, x = res/50, y = 20-x; 4216 int i, x = per_milli/50, y = 20-x;
4063 seq_printf(seq, "["); 4217 seq_printf(seq, "[");
4064 for (i = 0; i < x; i++) 4218 for (i = 0; i < x; i++)
4065 seq_printf(seq, "="); 4219 seq_printf(seq, "=");
@@ -4068,10 +4222,14 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
4068 seq_printf(seq, "."); 4222 seq_printf(seq, ".");
4069 seq_printf(seq, "] "); 4223 seq_printf(seq, "] ");
4070 } 4224 }
4071 seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", 4225 seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
4226 (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
4227 "reshape" :
4072 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? 4228 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
4073 "resync" : "recovery"), 4229 "resync" : "recovery")),
4074 res/10, res % 10, resync, max_blocks); 4230 per_milli/10, per_milli % 10,
4231 (unsigned long long) resync,
4232 (unsigned long long) max_blocks);
4075 4233
4076 /* 4234 /*
4077 * We do not want to overflow, so the order of operands and 4235 * We do not want to overflow, so the order of operands and
@@ -4085,7 +4243,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
4085 dt = ((jiffies - mddev->resync_mark) / HZ); 4243 dt = ((jiffies - mddev->resync_mark) / HZ);
4086 if (!dt) dt++; 4244 if (!dt) dt++;
4087 db = resync - (mddev->resync_mark_cnt/2); 4245 db = resync - (mddev->resync_mark_cnt/2);
4088 rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; 4246 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100;
4089 4247
4090 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); 4248 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
4091 4249
@@ -4442,7 +4600,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
4442 4600
4443#define SYNC_MARKS 10 4601#define SYNC_MARKS 10
4444#define SYNC_MARK_STEP (3*HZ) 4602#define SYNC_MARK_STEP (3*HZ)
4445static void md_do_sync(mddev_t *mddev) 4603void md_do_sync(mddev_t *mddev)
4446{ 4604{
4447 mddev_t *mddev2; 4605 mddev_t *mddev2;
4448 unsigned int currspeed = 0, 4606 unsigned int currspeed = 0,
@@ -4522,7 +4680,9 @@ static void md_do_sync(mddev_t *mddev)
4522 */ 4680 */
4523 max_sectors = mddev->resync_max_sectors; 4681 max_sectors = mddev->resync_max_sectors;
4524 mddev->resync_mismatches = 0; 4682 mddev->resync_mismatches = 0;
4525 } else 4683 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4684 max_sectors = mddev->size << 1;
4685 else
4526 /* recovery follows the physical size of devices */ 4686 /* recovery follows the physical size of devices */
4527 max_sectors = mddev->size << 1; 4687 max_sectors = mddev->size << 1;
4528 4688
@@ -4658,6 +4818,8 @@ static void md_do_sync(mddev_t *mddev)
4658 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); 4818 mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
4659 4819
4660 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 4820 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
4821 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
4822 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
4661 mddev->curr_resync > 2 && 4823 mddev->curr_resync > 2 &&
4662 mddev->curr_resync >= mddev->recovery_cp) { 4824 mddev->curr_resync >= mddev->recovery_cp) {
4663 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 4825 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -4675,6 +4837,7 @@ static void md_do_sync(mddev_t *mddev)
4675 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 4837 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
4676 md_wakeup_thread(mddev->thread); 4838 md_wakeup_thread(mddev->thread);
4677} 4839}
4840EXPORT_SYMBOL_GPL(md_do_sync);
4678 4841
4679 4842
4680/* 4843/*
@@ -4730,7 +4893,7 @@ void md_check_recovery(mddev_t *mddev)
4730 )) 4893 ))
4731 return; 4894 return;
4732 4895
4733 if (mddev_trylock(mddev)==0) { 4896 if (mddev_trylock(mddev)) {
4734 int spares =0; 4897 int spares =0;
4735 4898
4736 spin_lock_irq(&mddev->write_lock); 4899 spin_lock_irq(&mddev->write_lock);
@@ -4866,7 +5029,7 @@ static int md_notify_reboot(struct notifier_block *this,
4866 printk(KERN_INFO "md: stopping all md devices.\n"); 5029 printk(KERN_INFO "md: stopping all md devices.\n");
4867 5030
4868 ITERATE_MDDEV(mddev,tmp) 5031 ITERATE_MDDEV(mddev,tmp)
4869 if (mddev_trylock(mddev)==0) 5032 if (mddev_trylock(mddev))
4870 do_md_stop (mddev, 1); 5033 do_md_stop (mddev, 1);
4871 /* 5034 /*
4872 * certain more exotic SCSI devices are known to be 5035 * certain more exotic SCSI devices are known to be