diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 235 |
1 files changed, 199 insertions, 36 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5ed2228745cb..039e071c1007 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 43 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
44 | #include <linux/suspend.h> | 44 | #include <linux/suspend.h> |
45 | #include <linux/poll.h> | 45 | #include <linux/poll.h> |
46 | #include <linux/mutex.h> | ||
46 | 47 | ||
47 | #include <linux/init.h> | 48 | #include <linux/init.h> |
48 | 49 | ||
@@ -158,11 +159,12 @@ static int start_readonly; | |||
158 | */ | 159 | */ |
159 | static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); | 160 | static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); |
160 | static atomic_t md_event_count; | 161 | static atomic_t md_event_count; |
161 | static void md_new_event(mddev_t *mddev) | 162 | void md_new_event(mddev_t *mddev) |
162 | { | 163 | { |
163 | atomic_inc(&md_event_count); | 164 | atomic_inc(&md_event_count); |
164 | wake_up(&md_event_waiters); | 165 | wake_up(&md_event_waiters); |
165 | } | 166 | } |
167 | EXPORT_SYMBOL_GPL(md_new_event); | ||
166 | 168 | ||
167 | /* | 169 | /* |
168 | * Enables to iterate over all existing md arrays | 170 | * Enables to iterate over all existing md arrays |
@@ -253,7 +255,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
253 | else | 255 | else |
254 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 256 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
255 | 257 | ||
256 | init_MUTEX(&new->reconfig_sem); | 258 | mutex_init(&new->reconfig_mutex); |
257 | INIT_LIST_HEAD(&new->disks); | 259 | INIT_LIST_HEAD(&new->disks); |
258 | INIT_LIST_HEAD(&new->all_mddevs); | 260 | INIT_LIST_HEAD(&new->all_mddevs); |
259 | init_timer(&new->safemode_timer); | 261 | init_timer(&new->safemode_timer); |
@@ -266,6 +268,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
266 | kfree(new); | 268 | kfree(new); |
267 | return NULL; | 269 | return NULL; |
268 | } | 270 | } |
271 | set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); | ||
269 | 272 | ||
270 | blk_queue_make_request(new->queue, md_fail_request); | 273 | blk_queue_make_request(new->queue, md_fail_request); |
271 | 274 | ||
@@ -274,22 +277,22 @@ static mddev_t * mddev_find(dev_t unit) | |||
274 | 277 | ||
275 | static inline int mddev_lock(mddev_t * mddev) | 278 | static inline int mddev_lock(mddev_t * mddev) |
276 | { | 279 | { |
277 | return down_interruptible(&mddev->reconfig_sem); | 280 | return mutex_lock_interruptible(&mddev->reconfig_mutex); |
278 | } | 281 | } |
279 | 282 | ||
280 | static inline void mddev_lock_uninterruptible(mddev_t * mddev) | 283 | static inline void mddev_lock_uninterruptible(mddev_t * mddev) |
281 | { | 284 | { |
282 | down(&mddev->reconfig_sem); | 285 | mutex_lock(&mddev->reconfig_mutex); |
283 | } | 286 | } |
284 | 287 | ||
285 | static inline int mddev_trylock(mddev_t * mddev) | 288 | static inline int mddev_trylock(mddev_t * mddev) |
286 | { | 289 | { |
287 | return down_trylock(&mddev->reconfig_sem); | 290 | return mutex_trylock(&mddev->reconfig_mutex); |
288 | } | 291 | } |
289 | 292 | ||
290 | static inline void mddev_unlock(mddev_t * mddev) | 293 | static inline void mddev_unlock(mddev_t * mddev) |
291 | { | 294 | { |
292 | up(&mddev->reconfig_sem); | 295 | mutex_unlock(&mddev->reconfig_mutex); |
293 | 296 | ||
294 | md_wakeup_thread(mddev->thread); | 297 | md_wakeup_thread(mddev->thread); |
295 | } | 298 | } |
@@ -660,7 +663,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
660 | } | 663 | } |
661 | 664 | ||
662 | if (sb->major_version != 0 || | 665 | if (sb->major_version != 0 || |
663 | sb->minor_version != 90) { | 666 | sb->minor_version < 90 || |
667 | sb->minor_version > 91) { | ||
664 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", | 668 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", |
665 | sb->major_version, sb->minor_version, | 669 | sb->major_version, sb->minor_version, |
666 | b); | 670 | b); |
@@ -745,6 +749,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
745 | mddev->bitmap_offset = 0; | 749 | mddev->bitmap_offset = 0; |
746 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 750 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
747 | 751 | ||
752 | if (mddev->minor_version >= 91) { | ||
753 | mddev->reshape_position = sb->reshape_position; | ||
754 | mddev->delta_disks = sb->delta_disks; | ||
755 | mddev->new_level = sb->new_level; | ||
756 | mddev->new_layout = sb->new_layout; | ||
757 | mddev->new_chunk = sb->new_chunk; | ||
758 | } else { | ||
759 | mddev->reshape_position = MaxSector; | ||
760 | mddev->delta_disks = 0; | ||
761 | mddev->new_level = mddev->level; | ||
762 | mddev->new_layout = mddev->layout; | ||
763 | mddev->new_chunk = mddev->chunk_size; | ||
764 | } | ||
765 | |||
748 | if (sb->state & (1<<MD_SB_CLEAN)) | 766 | if (sb->state & (1<<MD_SB_CLEAN)) |
749 | mddev->recovery_cp = MaxSector; | 767 | mddev->recovery_cp = MaxSector; |
750 | else { | 768 | else { |
@@ -764,7 +782,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
764 | 782 | ||
765 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 783 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
766 | mddev->bitmap_file == NULL) { | 784 | mddev->bitmap_file == NULL) { |
767 | if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 | 785 | if (mddev->level != 1 && mddev->level != 4 |
786 | && mddev->level != 5 && mddev->level != 6 | ||
768 | && mddev->level != 10) { | 787 | && mddev->level != 10) { |
769 | /* FIXME use a better test */ | 788 | /* FIXME use a better test */ |
770 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); | 789 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); |
@@ -838,7 +857,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
838 | 857 | ||
839 | sb->md_magic = MD_SB_MAGIC; | 858 | sb->md_magic = MD_SB_MAGIC; |
840 | sb->major_version = mddev->major_version; | 859 | sb->major_version = mddev->major_version; |
841 | sb->minor_version = mddev->minor_version; | ||
842 | sb->patch_version = mddev->patch_version; | 860 | sb->patch_version = mddev->patch_version; |
843 | sb->gvalid_words = 0; /* ignored */ | 861 | sb->gvalid_words = 0; /* ignored */ |
844 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); | 862 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); |
@@ -857,6 +875,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
857 | sb->events_hi = (mddev->events>>32); | 875 | sb->events_hi = (mddev->events>>32); |
858 | sb->events_lo = (u32)mddev->events; | 876 | sb->events_lo = (u32)mddev->events; |
859 | 877 | ||
878 | if (mddev->reshape_position == MaxSector) | ||
879 | sb->minor_version = 90; | ||
880 | else { | ||
881 | sb->minor_version = 91; | ||
882 | sb->reshape_position = mddev->reshape_position; | ||
883 | sb->new_level = mddev->new_level; | ||
884 | sb->delta_disks = mddev->delta_disks; | ||
885 | sb->new_layout = mddev->new_layout; | ||
886 | sb->new_chunk = mddev->new_chunk; | ||
887 | } | ||
888 | mddev->minor_version = sb->minor_version; | ||
860 | if (mddev->in_sync) | 889 | if (mddev->in_sync) |
861 | { | 890 | { |
862 | sb->recovery_cp = mddev->recovery_cp; | 891 | sb->recovery_cp = mddev->recovery_cp; |
@@ -893,10 +922,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
893 | d->raid_disk = rdev2->raid_disk; | 922 | d->raid_disk = rdev2->raid_disk; |
894 | else | 923 | else |
895 | d->raid_disk = rdev2->desc_nr; /* compatibility */ | 924 | d->raid_disk = rdev2->desc_nr; /* compatibility */ |
896 | if (test_bit(Faulty, &rdev2->flags)) { | 925 | if (test_bit(Faulty, &rdev2->flags)) |
897 | d->state = (1<<MD_DISK_FAULTY); | 926 | d->state = (1<<MD_DISK_FAULTY); |
898 | failed++; | 927 | else if (test_bit(In_sync, &rdev2->flags)) { |
899 | } else if (test_bit(In_sync, &rdev2->flags)) { | ||
900 | d->state = (1<<MD_DISK_ACTIVE); | 928 | d->state = (1<<MD_DISK_ACTIVE); |
901 | d->state |= (1<<MD_DISK_SYNC); | 929 | d->state |= (1<<MD_DISK_SYNC); |
902 | active++; | 930 | active++; |
@@ -1102,6 +1130,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1102 | } | 1130 | } |
1103 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | 1131 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); |
1104 | } | 1132 | } |
1133 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { | ||
1134 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); | ||
1135 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); | ||
1136 | mddev->new_level = le32_to_cpu(sb->new_level); | ||
1137 | mddev->new_layout = le32_to_cpu(sb->new_layout); | ||
1138 | mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; | ||
1139 | } else { | ||
1140 | mddev->reshape_position = MaxSector; | ||
1141 | mddev->delta_disks = 0; | ||
1142 | mddev->new_level = mddev->level; | ||
1143 | mddev->new_layout = mddev->layout; | ||
1144 | mddev->new_chunk = mddev->chunk_size; | ||
1145 | } | ||
1146 | |||
1105 | } else if (mddev->pers == NULL) { | 1147 | } else if (mddev->pers == NULL) { |
1106 | /* Insist of good event counter while assembling */ | 1148 | /* Insist of good event counter while assembling */ |
1107 | __u64 ev1 = le64_to_cpu(sb->events); | 1149 | __u64 ev1 = le64_to_cpu(sb->events); |
@@ -1173,6 +1215,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1173 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1215 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1174 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1216 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1175 | } | 1217 | } |
1218 | if (mddev->reshape_position != MaxSector) { | ||
1219 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | ||
1220 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | ||
1221 | sb->new_layout = cpu_to_le32(mddev->new_layout); | ||
1222 | sb->delta_disks = cpu_to_le32(mddev->delta_disks); | ||
1223 | sb->new_level = cpu_to_le32(mddev->new_level); | ||
1224 | sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); | ||
1225 | } | ||
1176 | 1226 | ||
1177 | max_dev = 0; | 1227 | max_dev = 0; |
1178 | ITERATE_RDEV(mddev,rdev2,tmp) | 1228 | ITERATE_RDEV(mddev,rdev2,tmp) |
@@ -1301,6 +1351,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1301 | else | 1351 | else |
1302 | ko = &rdev->bdev->bd_disk->kobj; | 1352 | ko = &rdev->bdev->bd_disk->kobj; |
1303 | sysfs_create_link(&rdev->kobj, ko, "block"); | 1353 | sysfs_create_link(&rdev->kobj, ko, "block"); |
1354 | bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); | ||
1304 | return 0; | 1355 | return 0; |
1305 | } | 1356 | } |
1306 | 1357 | ||
@@ -1311,6 +1362,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1311 | MD_BUG(); | 1362 | MD_BUG(); |
1312 | return; | 1363 | return; |
1313 | } | 1364 | } |
1365 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); | ||
1314 | list_del_init(&rdev->same_set); | 1366 | list_del_init(&rdev->same_set); |
1315 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1367 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1316 | rdev->mddev = NULL; | 1368 | rdev->mddev = NULL; |
@@ -1493,7 +1545,7 @@ static void sync_sbs(mddev_t * mddev) | |||
1493 | } | 1545 | } |
1494 | } | 1546 | } |
1495 | 1547 | ||
1496 | static void md_update_sb(mddev_t * mddev) | 1548 | void md_update_sb(mddev_t * mddev) |
1497 | { | 1549 | { |
1498 | int err; | 1550 | int err; |
1499 | struct list_head *tmp; | 1551 | struct list_head *tmp; |
@@ -1570,6 +1622,7 @@ repeat: | |||
1570 | wake_up(&mddev->sb_wait); | 1622 | wake_up(&mddev->sb_wait); |
1571 | 1623 | ||
1572 | } | 1624 | } |
1625 | EXPORT_SYMBOL_GPL(md_update_sb); | ||
1573 | 1626 | ||
1574 | /* words written to sysfs files may, or my not, be \n terminated. | 1627 | /* words written to sysfs files may, or my not, be \n terminated. |
1575 | * We want to accept with case. For this we use cmd_match. | 1628 | * We want to accept with case. For this we use cmd_match. |
@@ -2162,7 +2215,9 @@ action_show(mddev_t *mddev, char *page) | |||
2162 | char *type = "idle"; | 2215 | char *type = "idle"; |
2163 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 2216 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
2164 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { | 2217 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { |
2165 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 2218 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
2219 | type = "reshape"; | ||
2220 | else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
2166 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 2221 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
2167 | type = "resync"; | 2222 | type = "resync"; |
2168 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 2223 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) |
@@ -2193,7 +2248,14 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
2193 | return -EBUSY; | 2248 | return -EBUSY; |
2194 | else if (cmd_match(page, "resync") || cmd_match(page, "recover")) | 2249 | else if (cmd_match(page, "resync") || cmd_match(page, "recover")) |
2195 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2250 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2196 | else { | 2251 | else if (cmd_match(page, "reshape")) { |
2252 | int err; | ||
2253 | if (mddev->pers->start_reshape == NULL) | ||
2254 | return -EINVAL; | ||
2255 | err = mddev->pers->start_reshape(mddev); | ||
2256 | if (err) | ||
2257 | return err; | ||
2258 | } else { | ||
2197 | if (cmd_match(page, "check")) | 2259 | if (cmd_match(page, "check")) |
2198 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 2260 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
2199 | else if (cmd_match(page, "repair")) | 2261 | else if (cmd_match(page, "repair")) |
@@ -2304,6 +2366,63 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
2304 | static struct md_sysfs_entry | 2366 | static struct md_sysfs_entry |
2305 | md_sync_completed = __ATTR_RO(sync_completed); | 2367 | md_sync_completed = __ATTR_RO(sync_completed); |
2306 | 2368 | ||
2369 | static ssize_t | ||
2370 | suspend_lo_show(mddev_t *mddev, char *page) | ||
2371 | { | ||
2372 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); | ||
2373 | } | ||
2374 | |||
2375 | static ssize_t | ||
2376 | suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) | ||
2377 | { | ||
2378 | char *e; | ||
2379 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
2380 | |||
2381 | if (mddev->pers->quiesce == NULL) | ||
2382 | return -EINVAL; | ||
2383 | if (buf == e || (*e && *e != '\n')) | ||
2384 | return -EINVAL; | ||
2385 | if (new >= mddev->suspend_hi || | ||
2386 | (new > mddev->suspend_lo && new < mddev->suspend_hi)) { | ||
2387 | mddev->suspend_lo = new; | ||
2388 | mddev->pers->quiesce(mddev, 2); | ||
2389 | return len; | ||
2390 | } else | ||
2391 | return -EINVAL; | ||
2392 | } | ||
2393 | static struct md_sysfs_entry md_suspend_lo = | ||
2394 | __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); | ||
2395 | |||
2396 | |||
2397 | static ssize_t | ||
2398 | suspend_hi_show(mddev_t *mddev, char *page) | ||
2399 | { | ||
2400 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi); | ||
2401 | } | ||
2402 | |||
2403 | static ssize_t | ||
2404 | suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | ||
2405 | { | ||
2406 | char *e; | ||
2407 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
2408 | |||
2409 | if (mddev->pers->quiesce == NULL) | ||
2410 | return -EINVAL; | ||
2411 | if (buf == e || (*e && *e != '\n')) | ||
2412 | return -EINVAL; | ||
2413 | if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || | ||
2414 | (new > mddev->suspend_lo && new > mddev->suspend_hi)) { | ||
2415 | mddev->suspend_hi = new; | ||
2416 | mddev->pers->quiesce(mddev, 1); | ||
2417 | mddev->pers->quiesce(mddev, 0); | ||
2418 | return len; | ||
2419 | } else | ||
2420 | return -EINVAL; | ||
2421 | } | ||
2422 | static struct md_sysfs_entry md_suspend_hi = | ||
2423 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | ||
2424 | |||
2425 | |||
2307 | static struct attribute *md_default_attrs[] = { | 2426 | static struct attribute *md_default_attrs[] = { |
2308 | &md_level.attr, | 2427 | &md_level.attr, |
2309 | &md_raid_disks.attr, | 2428 | &md_raid_disks.attr, |
@@ -2321,6 +2440,8 @@ static struct attribute *md_redundancy_attrs[] = { | |||
2321 | &md_sync_max.attr, | 2440 | &md_sync_max.attr, |
2322 | &md_sync_speed.attr, | 2441 | &md_sync_speed.attr, |
2323 | &md_sync_completed.attr, | 2442 | &md_sync_completed.attr, |
2443 | &md_suspend_lo.attr, | ||
2444 | &md_suspend_hi.attr, | ||
2324 | NULL, | 2445 | NULL, |
2325 | }; | 2446 | }; |
2326 | static struct attribute_group md_redundancy_group = { | 2447 | static struct attribute_group md_redundancy_group = { |
@@ -2380,7 +2501,7 @@ int mdp_major = 0; | |||
2380 | 2501 | ||
2381 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 2502 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
2382 | { | 2503 | { |
2383 | static DECLARE_MUTEX(disks_sem); | 2504 | static DEFINE_MUTEX(disks_mutex); |
2384 | mddev_t *mddev = mddev_find(dev); | 2505 | mddev_t *mddev = mddev_find(dev); |
2385 | struct gendisk *disk; | 2506 | struct gendisk *disk; |
2386 | int partitioned = (MAJOR(dev) != MD_MAJOR); | 2507 | int partitioned = (MAJOR(dev) != MD_MAJOR); |
@@ -2390,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2390 | if (!mddev) | 2511 | if (!mddev) |
2391 | return NULL; | 2512 | return NULL; |
2392 | 2513 | ||
2393 | down(&disks_sem); | 2514 | mutex_lock(&disks_mutex); |
2394 | if (mddev->gendisk) { | 2515 | if (mddev->gendisk) { |
2395 | up(&disks_sem); | 2516 | mutex_unlock(&disks_mutex); |
2396 | mddev_put(mddev); | 2517 | mddev_put(mddev); |
2397 | return NULL; | 2518 | return NULL; |
2398 | } | 2519 | } |
2399 | disk = alloc_disk(1 << shift); | 2520 | disk = alloc_disk(1 << shift); |
2400 | if (!disk) { | 2521 | if (!disk) { |
2401 | up(&disks_sem); | 2522 | mutex_unlock(&disks_mutex); |
2402 | mddev_put(mddev); | 2523 | mddev_put(mddev); |
2403 | return NULL; | 2524 | return NULL; |
2404 | } | 2525 | } |
@@ -2416,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2416 | disk->queue = mddev->queue; | 2537 | disk->queue = mddev->queue; |
2417 | add_disk(disk); | 2538 | add_disk(disk); |
2418 | mddev->gendisk = disk; | 2539 | mddev->gendisk = disk; |
2419 | up(&disks_sem); | 2540 | mutex_unlock(&disks_mutex); |
2420 | mddev->kobj.parent = &disk->kobj; | 2541 | mddev->kobj.parent = &disk->kobj; |
2421 | mddev->kobj.k_name = NULL; | 2542 | mddev->kobj.k_name = NULL; |
2422 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); | 2543 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); |
@@ -2539,6 +2660,14 @@ static int do_md_run(mddev_t * mddev) | |||
2539 | mddev->level = pers->level; | 2660 | mddev->level = pers->level; |
2540 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2661 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
2541 | 2662 | ||
2663 | if (mddev->reshape_position != MaxSector && | ||
2664 | pers->start_reshape == NULL) { | ||
2665 | /* This personality cannot handle reshaping... */ | ||
2666 | mddev->pers = NULL; | ||
2667 | module_put(pers->owner); | ||
2668 | return -EINVAL; | ||
2669 | } | ||
2670 | |||
2542 | mddev->recovery = 0; | 2671 | mddev->recovery = 0; |
2543 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2672 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
2544 | mddev->barriers_work = 1; | 2673 | mddev->barriers_work = 1; |
@@ -2772,7 +2901,6 @@ static void autorun_array(mddev_t *mddev) | |||
2772 | */ | 2901 | */ |
2773 | static void autorun_devices(int part) | 2902 | static void autorun_devices(int part) |
2774 | { | 2903 | { |
2775 | struct list_head candidates; | ||
2776 | struct list_head *tmp; | 2904 | struct list_head *tmp; |
2777 | mdk_rdev_t *rdev0, *rdev; | 2905 | mdk_rdev_t *rdev0, *rdev; |
2778 | mddev_t *mddev; | 2906 | mddev_t *mddev; |
@@ -2781,6 +2909,7 @@ static void autorun_devices(int part) | |||
2781 | printk(KERN_INFO "md: autorun ...\n"); | 2909 | printk(KERN_INFO "md: autorun ...\n"); |
2782 | while (!list_empty(&pending_raid_disks)) { | 2910 | while (!list_empty(&pending_raid_disks)) { |
2783 | dev_t dev; | 2911 | dev_t dev; |
2912 | LIST_HEAD(candidates); | ||
2784 | rdev0 = list_entry(pending_raid_disks.next, | 2913 | rdev0 = list_entry(pending_raid_disks.next, |
2785 | mdk_rdev_t, same_set); | 2914 | mdk_rdev_t, same_set); |
2786 | 2915 | ||
@@ -3427,11 +3556,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
3427 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 3556 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
3428 | mddev->bitmap_offset = 0; | 3557 | mddev->bitmap_offset = 0; |
3429 | 3558 | ||
3559 | mddev->reshape_position = MaxSector; | ||
3560 | |||
3430 | /* | 3561 | /* |
3431 | * Generate a 128 bit UUID | 3562 | * Generate a 128 bit UUID |
3432 | */ | 3563 | */ |
3433 | get_random_bytes(mddev->uuid, 16); | 3564 | get_random_bytes(mddev->uuid, 16); |
3434 | 3565 | ||
3566 | mddev->new_level = mddev->level; | ||
3567 | mddev->new_chunk = mddev->chunk_size; | ||
3568 | mddev->new_layout = mddev->layout; | ||
3569 | mddev->delta_disks = 0; | ||
3570 | |||
3435 | return 0; | 3571 | return 0; |
3436 | } | 3572 | } |
3437 | 3573 | ||
@@ -3440,6 +3576,7 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
3440 | mdk_rdev_t * rdev; | 3576 | mdk_rdev_t * rdev; |
3441 | int rv; | 3577 | int rv; |
3442 | struct list_head *tmp; | 3578 | struct list_head *tmp; |
3579 | int fit = (size == 0); | ||
3443 | 3580 | ||
3444 | if (mddev->pers->resize == NULL) | 3581 | if (mddev->pers->resize == NULL) |
3445 | return -EINVAL; | 3582 | return -EINVAL; |
@@ -3457,7 +3594,6 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
3457 | return -EBUSY; | 3594 | return -EBUSY; |
3458 | ITERATE_RDEV(mddev,rdev,tmp) { | 3595 | ITERATE_RDEV(mddev,rdev,tmp) { |
3459 | sector_t avail; | 3596 | sector_t avail; |
3460 | int fit = (size == 0); | ||
3461 | if (rdev->sb_offset > rdev->data_offset) | 3597 | if (rdev->sb_offset > rdev->data_offset) |
3462 | avail = (rdev->sb_offset*2) - rdev->data_offset; | 3598 | avail = (rdev->sb_offset*2) - rdev->data_offset; |
3463 | else | 3599 | else |
@@ -3487,14 +3623,16 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) | |||
3487 | { | 3623 | { |
3488 | int rv; | 3624 | int rv; |
3489 | /* change the number of raid disks */ | 3625 | /* change the number of raid disks */ |
3490 | if (mddev->pers->reshape == NULL) | 3626 | if (mddev->pers->check_reshape == NULL) |
3491 | return -EINVAL; | 3627 | return -EINVAL; |
3492 | if (raid_disks <= 0 || | 3628 | if (raid_disks <= 0 || |
3493 | raid_disks >= mddev->max_disks) | 3629 | raid_disks >= mddev->max_disks) |
3494 | return -EINVAL; | 3630 | return -EINVAL; |
3495 | if (mddev->sync_thread) | 3631 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) |
3496 | return -EBUSY; | 3632 | return -EBUSY; |
3497 | rv = mddev->pers->reshape(mddev, raid_disks); | 3633 | mddev->delta_disks = raid_disks - mddev->raid_disks; |
3634 | |||
3635 | rv = mddev->pers->check_reshape(mddev); | ||
3498 | return rv; | 3636 | return rv; |
3499 | } | 3637 | } |
3500 | 3638 | ||
@@ -4041,7 +4179,10 @@ static void status_unused(struct seq_file *seq) | |||
4041 | 4179 | ||
4042 | static void status_resync(struct seq_file *seq, mddev_t * mddev) | 4180 | static void status_resync(struct seq_file *seq, mddev_t * mddev) |
4043 | { | 4181 | { |
4044 | unsigned long max_blocks, resync, res, dt, db, rt; | 4182 | sector_t max_blocks, resync, res; |
4183 | unsigned long dt, db, rt; | ||
4184 | int scale; | ||
4185 | unsigned int per_milli; | ||
4045 | 4186 | ||
4046 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; | 4187 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; |
4047 | 4188 | ||
@@ -4057,9 +4198,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
4057 | MD_BUG(); | 4198 | MD_BUG(); |
4058 | return; | 4199 | return; |
4059 | } | 4200 | } |
4060 | res = (resync/1024)*1000/(max_blocks/1024 + 1); | 4201 | /* Pick 'scale' such that (resync>>scale)*1000 will fit |
4202 | * in a sector_t, and (max_blocks>>scale) will fit in a | ||
4203 | * u32, as those are the requirements for sector_div. | ||
4204 | * Thus 'scale' must be at least 10 | ||
4205 | */ | ||
4206 | scale = 10; | ||
4207 | if (sizeof(sector_t) > sizeof(unsigned long)) { | ||
4208 | while ( max_blocks/2 > (1ULL<<(scale+32))) | ||
4209 | scale++; | ||
4210 | } | ||
4211 | res = (resync>>scale)*1000; | ||
4212 | sector_div(res, (u32)((max_blocks>>scale)+1)); | ||
4213 | |||
4214 | per_milli = res; | ||
4061 | { | 4215 | { |
4062 | int i, x = res/50, y = 20-x; | 4216 | int i, x = per_milli/50, y = 20-x; |
4063 | seq_printf(seq, "["); | 4217 | seq_printf(seq, "["); |
4064 | for (i = 0; i < x; i++) | 4218 | for (i = 0; i < x; i++) |
4065 | seq_printf(seq, "="); | 4219 | seq_printf(seq, "="); |
@@ -4068,10 +4222,14 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
4068 | seq_printf(seq, "."); | 4222 | seq_printf(seq, "."); |
4069 | seq_printf(seq, "] "); | 4223 | seq_printf(seq, "] "); |
4070 | } | 4224 | } |
4071 | seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", | 4225 | seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)", |
4226 | (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)? | ||
4227 | "reshape" : | ||
4072 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? | 4228 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? |
4073 | "resync" : "recovery"), | 4229 | "resync" : "recovery")), |
4074 | res/10, res % 10, resync, max_blocks); | 4230 | per_milli/10, per_milli % 10, |
4231 | (unsigned long long) resync, | ||
4232 | (unsigned long long) max_blocks); | ||
4075 | 4233 | ||
4076 | /* | 4234 | /* |
4077 | * We do not want to overflow, so the order of operands and | 4235 | * We do not want to overflow, so the order of operands and |
@@ -4085,7 +4243,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
4085 | dt = ((jiffies - mddev->resync_mark) / HZ); | 4243 | dt = ((jiffies - mddev->resync_mark) / HZ); |
4086 | if (!dt) dt++; | 4244 | if (!dt) dt++; |
4087 | db = resync - (mddev->resync_mark_cnt/2); | 4245 | db = resync - (mddev->resync_mark_cnt/2); |
4088 | rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; | 4246 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; |
4089 | 4247 | ||
4090 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); | 4248 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); |
4091 | 4249 | ||
@@ -4442,7 +4600,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
4442 | 4600 | ||
4443 | #define SYNC_MARKS 10 | 4601 | #define SYNC_MARKS 10 |
4444 | #define SYNC_MARK_STEP (3*HZ) | 4602 | #define SYNC_MARK_STEP (3*HZ) |
4445 | static void md_do_sync(mddev_t *mddev) | 4603 | void md_do_sync(mddev_t *mddev) |
4446 | { | 4604 | { |
4447 | mddev_t *mddev2; | 4605 | mddev_t *mddev2; |
4448 | unsigned int currspeed = 0, | 4606 | unsigned int currspeed = 0, |
@@ -4522,7 +4680,9 @@ static void md_do_sync(mddev_t *mddev) | |||
4522 | */ | 4680 | */ |
4523 | max_sectors = mddev->resync_max_sectors; | 4681 | max_sectors = mddev->resync_max_sectors; |
4524 | mddev->resync_mismatches = 0; | 4682 | mddev->resync_mismatches = 0; |
4525 | } else | 4683 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4684 | max_sectors = mddev->size << 1; | ||
4685 | else | ||
4526 | /* recovery follows the physical size of devices */ | 4686 | /* recovery follows the physical size of devices */ |
4527 | max_sectors = mddev->size << 1; | 4687 | max_sectors = mddev->size << 1; |
4528 | 4688 | ||
@@ -4658,6 +4818,8 @@ static void md_do_sync(mddev_t *mddev) | |||
4658 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); | 4818 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); |
4659 | 4819 | ||
4660 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 4820 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
4821 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
4822 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | ||
4661 | mddev->curr_resync > 2 && | 4823 | mddev->curr_resync > 2 && |
4662 | mddev->curr_resync >= mddev->recovery_cp) { | 4824 | mddev->curr_resync >= mddev->recovery_cp) { |
4663 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 4825 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -4675,6 +4837,7 @@ static void md_do_sync(mddev_t *mddev) | |||
4675 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 4837 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
4676 | md_wakeup_thread(mddev->thread); | 4838 | md_wakeup_thread(mddev->thread); |
4677 | } | 4839 | } |
4840 | EXPORT_SYMBOL_GPL(md_do_sync); | ||
4678 | 4841 | ||
4679 | 4842 | ||
4680 | /* | 4843 | /* |
@@ -4730,7 +4893,7 @@ void md_check_recovery(mddev_t *mddev) | |||
4730 | )) | 4893 | )) |
4731 | return; | 4894 | return; |
4732 | 4895 | ||
4733 | if (mddev_trylock(mddev)==0) { | 4896 | if (mddev_trylock(mddev)) { |
4734 | int spares =0; | 4897 | int spares =0; |
4735 | 4898 | ||
4736 | spin_lock_irq(&mddev->write_lock); | 4899 | spin_lock_irq(&mddev->write_lock); |
@@ -4866,7 +5029,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
4866 | printk(KERN_INFO "md: stopping all md devices.\n"); | 5029 | printk(KERN_INFO "md: stopping all md devices.\n"); |
4867 | 5030 | ||
4868 | ITERATE_MDDEV(mddev,tmp) | 5031 | ITERATE_MDDEV(mddev,tmp) |
4869 | if (mddev_trylock(mddev)==0) | 5032 | if (mddev_trylock(mddev)) |
4870 | do_md_stop (mddev, 1); | 5033 | do_md_stop (mddev, 1); |
4871 | /* | 5034 | /* |
4872 | * certain more exotic SCSI devices are known to be | 5035 | * certain more exotic SCSI devices are known to be |