diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 235 |
1 files changed, 199 insertions, 36 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5ed2228745cb..039e071c1007 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/buffer_head.h> /* for invalidate_bdev */ | 43 | #include <linux/buffer_head.h> /* for invalidate_bdev */ |
| 44 | #include <linux/suspend.h> | 44 | #include <linux/suspend.h> |
| 45 | #include <linux/poll.h> | 45 | #include <linux/poll.h> |
| 46 | #include <linux/mutex.h> | ||
| 46 | 47 | ||
| 47 | #include <linux/init.h> | 48 | #include <linux/init.h> |
| 48 | 49 | ||
| @@ -158,11 +159,12 @@ static int start_readonly; | |||
| 158 | */ | 159 | */ |
| 159 | static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); | 160 | static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); |
| 160 | static atomic_t md_event_count; | 161 | static atomic_t md_event_count; |
| 161 | static void md_new_event(mddev_t *mddev) | 162 | void md_new_event(mddev_t *mddev) |
| 162 | { | 163 | { |
| 163 | atomic_inc(&md_event_count); | 164 | atomic_inc(&md_event_count); |
| 164 | wake_up(&md_event_waiters); | 165 | wake_up(&md_event_waiters); |
| 165 | } | 166 | } |
| 167 | EXPORT_SYMBOL_GPL(md_new_event); | ||
| 166 | 168 | ||
| 167 | /* | 169 | /* |
| 168 | * Enables to iterate over all existing md arrays | 170 | * Enables to iterate over all existing md arrays |
| @@ -253,7 +255,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
| 253 | else | 255 | else |
| 254 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 256 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
| 255 | 257 | ||
| 256 | init_MUTEX(&new->reconfig_sem); | 258 | mutex_init(&new->reconfig_mutex); |
| 257 | INIT_LIST_HEAD(&new->disks); | 259 | INIT_LIST_HEAD(&new->disks); |
| 258 | INIT_LIST_HEAD(&new->all_mddevs); | 260 | INIT_LIST_HEAD(&new->all_mddevs); |
| 259 | init_timer(&new->safemode_timer); | 261 | init_timer(&new->safemode_timer); |
| @@ -266,6 +268,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
| 266 | kfree(new); | 268 | kfree(new); |
| 267 | return NULL; | 269 | return NULL; |
| 268 | } | 270 | } |
| 271 | set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); | ||
| 269 | 272 | ||
| 270 | blk_queue_make_request(new->queue, md_fail_request); | 273 | blk_queue_make_request(new->queue, md_fail_request); |
| 271 | 274 | ||
| @@ -274,22 +277,22 @@ static mddev_t * mddev_find(dev_t unit) | |||
| 274 | 277 | ||
| 275 | static inline int mddev_lock(mddev_t * mddev) | 278 | static inline int mddev_lock(mddev_t * mddev) |
| 276 | { | 279 | { |
| 277 | return down_interruptible(&mddev->reconfig_sem); | 280 | return mutex_lock_interruptible(&mddev->reconfig_mutex); |
| 278 | } | 281 | } |
| 279 | 282 | ||
| 280 | static inline void mddev_lock_uninterruptible(mddev_t * mddev) | 283 | static inline void mddev_lock_uninterruptible(mddev_t * mddev) |
| 281 | { | 284 | { |
| 282 | down(&mddev->reconfig_sem); | 285 | mutex_lock(&mddev->reconfig_mutex); |
| 283 | } | 286 | } |
| 284 | 287 | ||
| 285 | static inline int mddev_trylock(mddev_t * mddev) | 288 | static inline int mddev_trylock(mddev_t * mddev) |
| 286 | { | 289 | { |
| 287 | return down_trylock(&mddev->reconfig_sem); | 290 | return mutex_trylock(&mddev->reconfig_mutex); |
| 288 | } | 291 | } |
| 289 | 292 | ||
| 290 | static inline void mddev_unlock(mddev_t * mddev) | 293 | static inline void mddev_unlock(mddev_t * mddev) |
| 291 | { | 294 | { |
| 292 | up(&mddev->reconfig_sem); | 295 | mutex_unlock(&mddev->reconfig_mutex); |
| 293 | 296 | ||
| 294 | md_wakeup_thread(mddev->thread); | 297 | md_wakeup_thread(mddev->thread); |
| 295 | } | 298 | } |
| @@ -660,7 +663,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
| 660 | } | 663 | } |
| 661 | 664 | ||
| 662 | if (sb->major_version != 0 || | 665 | if (sb->major_version != 0 || |
| 663 | sb->minor_version != 90) { | 666 | sb->minor_version < 90 || |
| 667 | sb->minor_version > 91) { | ||
| 664 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", | 668 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", |
| 665 | sb->major_version, sb->minor_version, | 669 | sb->major_version, sb->minor_version, |
| 666 | b); | 670 | b); |
| @@ -745,6 +749,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 745 | mddev->bitmap_offset = 0; | 749 | mddev->bitmap_offset = 0; |
| 746 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 750 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
| 747 | 751 | ||
| 752 | if (mddev->minor_version >= 91) { | ||
| 753 | mddev->reshape_position = sb->reshape_position; | ||
| 754 | mddev->delta_disks = sb->delta_disks; | ||
| 755 | mddev->new_level = sb->new_level; | ||
| 756 | mddev->new_layout = sb->new_layout; | ||
| 757 | mddev->new_chunk = sb->new_chunk; | ||
| 758 | } else { | ||
| 759 | mddev->reshape_position = MaxSector; | ||
| 760 | mddev->delta_disks = 0; | ||
| 761 | mddev->new_level = mddev->level; | ||
| 762 | mddev->new_layout = mddev->layout; | ||
| 763 | mddev->new_chunk = mddev->chunk_size; | ||
| 764 | } | ||
| 765 | |||
| 748 | if (sb->state & (1<<MD_SB_CLEAN)) | 766 | if (sb->state & (1<<MD_SB_CLEAN)) |
| 749 | mddev->recovery_cp = MaxSector; | 767 | mddev->recovery_cp = MaxSector; |
| 750 | else { | 768 | else { |
| @@ -764,7 +782,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 764 | 782 | ||
| 765 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 783 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
| 766 | mddev->bitmap_file == NULL) { | 784 | mddev->bitmap_file == NULL) { |
| 767 | if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 | 785 | if (mddev->level != 1 && mddev->level != 4 |
| 786 | && mddev->level != 5 && mddev->level != 6 | ||
| 768 | && mddev->level != 10) { | 787 | && mddev->level != 10) { |
| 769 | /* FIXME use a better test */ | 788 | /* FIXME use a better test */ |
| 770 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); | 789 | printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); |
| @@ -838,7 +857,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 838 | 857 | ||
| 839 | sb->md_magic = MD_SB_MAGIC; | 858 | sb->md_magic = MD_SB_MAGIC; |
| 840 | sb->major_version = mddev->major_version; | 859 | sb->major_version = mddev->major_version; |
| 841 | sb->minor_version = mddev->minor_version; | ||
| 842 | sb->patch_version = mddev->patch_version; | 860 | sb->patch_version = mddev->patch_version; |
| 843 | sb->gvalid_words = 0; /* ignored */ | 861 | sb->gvalid_words = 0; /* ignored */ |
| 844 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); | 862 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); |
| @@ -857,6 +875,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 857 | sb->events_hi = (mddev->events>>32); | 875 | sb->events_hi = (mddev->events>>32); |
| 858 | sb->events_lo = (u32)mddev->events; | 876 | sb->events_lo = (u32)mddev->events; |
| 859 | 877 | ||
| 878 | if (mddev->reshape_position == MaxSector) | ||
| 879 | sb->minor_version = 90; | ||
| 880 | else { | ||
| 881 | sb->minor_version = 91; | ||
| 882 | sb->reshape_position = mddev->reshape_position; | ||
| 883 | sb->new_level = mddev->new_level; | ||
| 884 | sb->delta_disks = mddev->delta_disks; | ||
| 885 | sb->new_layout = mddev->new_layout; | ||
| 886 | sb->new_chunk = mddev->new_chunk; | ||
| 887 | } | ||
| 888 | mddev->minor_version = sb->minor_version; | ||
| 860 | if (mddev->in_sync) | 889 | if (mddev->in_sync) |
| 861 | { | 890 | { |
| 862 | sb->recovery_cp = mddev->recovery_cp; | 891 | sb->recovery_cp = mddev->recovery_cp; |
| @@ -893,10 +922,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 893 | d->raid_disk = rdev2->raid_disk; | 922 | d->raid_disk = rdev2->raid_disk; |
| 894 | else | 923 | else |
| 895 | d->raid_disk = rdev2->desc_nr; /* compatibility */ | 924 | d->raid_disk = rdev2->desc_nr; /* compatibility */ |
| 896 | if (test_bit(Faulty, &rdev2->flags)) { | 925 | if (test_bit(Faulty, &rdev2->flags)) |
| 897 | d->state = (1<<MD_DISK_FAULTY); | 926 | d->state = (1<<MD_DISK_FAULTY); |
| 898 | failed++; | 927 | else if (test_bit(In_sync, &rdev2->flags)) { |
| 899 | } else if (test_bit(In_sync, &rdev2->flags)) { | ||
| 900 | d->state = (1<<MD_DISK_ACTIVE); | 928 | d->state = (1<<MD_DISK_ACTIVE); |
| 901 | d->state |= (1<<MD_DISK_SYNC); | 929 | d->state |= (1<<MD_DISK_SYNC); |
| 902 | active++; | 930 | active++; |
| @@ -1102,6 +1130,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1102 | } | 1130 | } |
| 1103 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | 1131 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); |
| 1104 | } | 1132 | } |
| 1133 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { | ||
| 1134 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); | ||
| 1135 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); | ||
| 1136 | mddev->new_level = le32_to_cpu(sb->new_level); | ||
| 1137 | mddev->new_layout = le32_to_cpu(sb->new_layout); | ||
| 1138 | mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; | ||
| 1139 | } else { | ||
| 1140 | mddev->reshape_position = MaxSector; | ||
| 1141 | mddev->delta_disks = 0; | ||
| 1142 | mddev->new_level = mddev->level; | ||
| 1143 | mddev->new_layout = mddev->layout; | ||
| 1144 | mddev->new_chunk = mddev->chunk_size; | ||
| 1145 | } | ||
| 1146 | |||
| 1105 | } else if (mddev->pers == NULL) { | 1147 | } else if (mddev->pers == NULL) { |
| 1106 | /* Insist of good event counter while assembling */ | 1148 | /* Insist of good event counter while assembling */ |
| 1107 | __u64 ev1 = le64_to_cpu(sb->events); | 1149 | __u64 ev1 = le64_to_cpu(sb->events); |
| @@ -1173,6 +1215,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1173 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1215 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
| 1174 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1216 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
| 1175 | } | 1217 | } |
| 1218 | if (mddev->reshape_position != MaxSector) { | ||
| 1219 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | ||
| 1220 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | ||
| 1221 | sb->new_layout = cpu_to_le32(mddev->new_layout); | ||
| 1222 | sb->delta_disks = cpu_to_le32(mddev->delta_disks); | ||
| 1223 | sb->new_level = cpu_to_le32(mddev->new_level); | ||
| 1224 | sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); | ||
| 1225 | } | ||
| 1176 | 1226 | ||
| 1177 | max_dev = 0; | 1227 | max_dev = 0; |
| 1178 | ITERATE_RDEV(mddev,rdev2,tmp) | 1228 | ITERATE_RDEV(mddev,rdev2,tmp) |
| @@ -1301,6 +1351,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
| 1301 | else | 1351 | else |
| 1302 | ko = &rdev->bdev->bd_disk->kobj; | 1352 | ko = &rdev->bdev->bd_disk->kobj; |
| 1303 | sysfs_create_link(&rdev->kobj, ko, "block"); | 1353 | sysfs_create_link(&rdev->kobj, ko, "block"); |
| 1354 | bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); | ||
| 1304 | return 0; | 1355 | return 0; |
| 1305 | } | 1356 | } |
| 1306 | 1357 | ||
| @@ -1311,6 +1362,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
| 1311 | MD_BUG(); | 1362 | MD_BUG(); |
| 1312 | return; | 1363 | return; |
| 1313 | } | 1364 | } |
| 1365 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); | ||
| 1314 | list_del_init(&rdev->same_set); | 1366 | list_del_init(&rdev->same_set); |
| 1315 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1367 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
| 1316 | rdev->mddev = NULL; | 1368 | rdev->mddev = NULL; |
| @@ -1493,7 +1545,7 @@ static void sync_sbs(mddev_t * mddev) | |||
| 1493 | } | 1545 | } |
| 1494 | } | 1546 | } |
| 1495 | 1547 | ||
| 1496 | static void md_update_sb(mddev_t * mddev) | 1548 | void md_update_sb(mddev_t * mddev) |
| 1497 | { | 1549 | { |
| 1498 | int err; | 1550 | int err; |
| 1499 | struct list_head *tmp; | 1551 | struct list_head *tmp; |
| @@ -1570,6 +1622,7 @@ repeat: | |||
| 1570 | wake_up(&mddev->sb_wait); | 1622 | wake_up(&mddev->sb_wait); |
| 1571 | 1623 | ||
| 1572 | } | 1624 | } |
| 1625 | EXPORT_SYMBOL_GPL(md_update_sb); | ||
| 1573 | 1626 | ||
| 1574 | /* words written to sysfs files may, or my not, be \n terminated. | 1627 | /* words written to sysfs files may, or my not, be \n terminated. |
| 1575 | * We want to accept with case. For this we use cmd_match. | 1628 | * We want to accept with case. For this we use cmd_match. |
| @@ -2162,7 +2215,9 @@ action_show(mddev_t *mddev, char *page) | |||
| 2162 | char *type = "idle"; | 2215 | char *type = "idle"; |
| 2163 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 2216 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
| 2164 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { | 2217 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { |
| 2165 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 2218 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| 2219 | type = "reshape"; | ||
| 2220 | else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | ||
| 2166 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 2221 | if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
| 2167 | type = "resync"; | 2222 | type = "resync"; |
| 2168 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) | 2223 | else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) |
| @@ -2193,7 +2248,14 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
| 2193 | return -EBUSY; | 2248 | return -EBUSY; |
| 2194 | else if (cmd_match(page, "resync") || cmd_match(page, "recover")) | 2249 | else if (cmd_match(page, "resync") || cmd_match(page, "recover")) |
| 2195 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2250 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| 2196 | else { | 2251 | else if (cmd_match(page, "reshape")) { |
| 2252 | int err; | ||
| 2253 | if (mddev->pers->start_reshape == NULL) | ||
| 2254 | return -EINVAL; | ||
| 2255 | err = mddev->pers->start_reshape(mddev); | ||
| 2256 | if (err) | ||
| 2257 | return err; | ||
| 2258 | } else { | ||
| 2197 | if (cmd_match(page, "check")) | 2259 | if (cmd_match(page, "check")) |
| 2198 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 2260 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
| 2199 | else if (cmd_match(page, "repair")) | 2261 | else if (cmd_match(page, "repair")) |
| @@ -2304,6 +2366,63 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
| 2304 | static struct md_sysfs_entry | 2366 | static struct md_sysfs_entry |
| 2305 | md_sync_completed = __ATTR_RO(sync_completed); | 2367 | md_sync_completed = __ATTR_RO(sync_completed); |
| 2306 | 2368 | ||
| 2369 | static ssize_t | ||
| 2370 | suspend_lo_show(mddev_t *mddev, char *page) | ||
| 2371 | { | ||
| 2372 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); | ||
| 2373 | } | ||
| 2374 | |||
| 2375 | static ssize_t | ||
| 2376 | suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) | ||
| 2377 | { | ||
| 2378 | char *e; | ||
| 2379 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
| 2380 | |||
| 2381 | if (mddev->pers->quiesce == NULL) | ||
| 2382 | return -EINVAL; | ||
| 2383 | if (buf == e || (*e && *e != '\n')) | ||
| 2384 | return -EINVAL; | ||
| 2385 | if (new >= mddev->suspend_hi || | ||
| 2386 | (new > mddev->suspend_lo && new < mddev->suspend_hi)) { | ||
| 2387 | mddev->suspend_lo = new; | ||
| 2388 | mddev->pers->quiesce(mddev, 2); | ||
| 2389 | return len; | ||
| 2390 | } else | ||
| 2391 | return -EINVAL; | ||
| 2392 | } | ||
| 2393 | static struct md_sysfs_entry md_suspend_lo = | ||
| 2394 | __ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); | ||
| 2395 | |||
| 2396 | |||
| 2397 | static ssize_t | ||
| 2398 | suspend_hi_show(mddev_t *mddev, char *page) | ||
| 2399 | { | ||
| 2400 | return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi); | ||
| 2401 | } | ||
| 2402 | |||
| 2403 | static ssize_t | ||
| 2404 | suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | ||
| 2405 | { | ||
| 2406 | char *e; | ||
| 2407 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
| 2408 | |||
| 2409 | if (mddev->pers->quiesce == NULL) | ||
| 2410 | return -EINVAL; | ||
| 2411 | if (buf == e || (*e && *e != '\n')) | ||
| 2412 | return -EINVAL; | ||
| 2413 | if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || | ||
| 2414 | (new > mddev->suspend_lo && new > mddev->suspend_hi)) { | ||
| 2415 | mddev->suspend_hi = new; | ||
| 2416 | mddev->pers->quiesce(mddev, 1); | ||
| 2417 | mddev->pers->quiesce(mddev, 0); | ||
| 2418 | return len; | ||
| 2419 | } else | ||
| 2420 | return -EINVAL; | ||
| 2421 | } | ||
| 2422 | static struct md_sysfs_entry md_suspend_hi = | ||
| 2423 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | ||
| 2424 | |||
| 2425 | |||
| 2307 | static struct attribute *md_default_attrs[] = { | 2426 | static struct attribute *md_default_attrs[] = { |
| 2308 | &md_level.attr, | 2427 | &md_level.attr, |
| 2309 | &md_raid_disks.attr, | 2428 | &md_raid_disks.attr, |
| @@ -2321,6 +2440,8 @@ static struct attribute *md_redundancy_attrs[] = { | |||
| 2321 | &md_sync_max.attr, | 2440 | &md_sync_max.attr, |
| 2322 | &md_sync_speed.attr, | 2441 | &md_sync_speed.attr, |
| 2323 | &md_sync_completed.attr, | 2442 | &md_sync_completed.attr, |
| 2443 | &md_suspend_lo.attr, | ||
| 2444 | &md_suspend_hi.attr, | ||
| 2324 | NULL, | 2445 | NULL, |
| 2325 | }; | 2446 | }; |
| 2326 | static struct attribute_group md_redundancy_group = { | 2447 | static struct attribute_group md_redundancy_group = { |
| @@ -2380,7 +2501,7 @@ int mdp_major = 0; | |||
| 2380 | 2501 | ||
| 2381 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 2502 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
| 2382 | { | 2503 | { |
| 2383 | static DECLARE_MUTEX(disks_sem); | 2504 | static DEFINE_MUTEX(disks_mutex); |
| 2384 | mddev_t *mddev = mddev_find(dev); | 2505 | mddev_t *mddev = mddev_find(dev); |
| 2385 | struct gendisk *disk; | 2506 | struct gendisk *disk; |
| 2386 | int partitioned = (MAJOR(dev) != MD_MAJOR); | 2507 | int partitioned = (MAJOR(dev) != MD_MAJOR); |
| @@ -2390,15 +2511,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
| 2390 | if (!mddev) | 2511 | if (!mddev) |
| 2391 | return NULL; | 2512 | return NULL; |
| 2392 | 2513 | ||
| 2393 | down(&disks_sem); | 2514 | mutex_lock(&disks_mutex); |
| 2394 | if (mddev->gendisk) { | 2515 | if (mddev->gendisk) { |
| 2395 | up(&disks_sem); | 2516 | mutex_unlock(&disks_mutex); |
| 2396 | mddev_put(mddev); | 2517 | mddev_put(mddev); |
| 2397 | return NULL; | 2518 | return NULL; |
| 2398 | } | 2519 | } |
| 2399 | disk = alloc_disk(1 << shift); | 2520 | disk = alloc_disk(1 << shift); |
| 2400 | if (!disk) { | 2521 | if (!disk) { |
| 2401 | up(&disks_sem); | 2522 | mutex_unlock(&disks_mutex); |
| 2402 | mddev_put(mddev); | 2523 | mddev_put(mddev); |
| 2403 | return NULL; | 2524 | return NULL; |
| 2404 | } | 2525 | } |
| @@ -2416,7 +2537,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
| 2416 | disk->queue = mddev->queue; | 2537 | disk->queue = mddev->queue; |
| 2417 | add_disk(disk); | 2538 | add_disk(disk); |
| 2418 | mddev->gendisk = disk; | 2539 | mddev->gendisk = disk; |
| 2419 | up(&disks_sem); | 2540 | mutex_unlock(&disks_mutex); |
| 2420 | mddev->kobj.parent = &disk->kobj; | 2541 | mddev->kobj.parent = &disk->kobj; |
| 2421 | mddev->kobj.k_name = NULL; | 2542 | mddev->kobj.k_name = NULL; |
| 2422 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); | 2543 | snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); |
| @@ -2539,6 +2660,14 @@ static int do_md_run(mddev_t * mddev) | |||
| 2539 | mddev->level = pers->level; | 2660 | mddev->level = pers->level; |
| 2540 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2661 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| 2541 | 2662 | ||
| 2663 | if (mddev->reshape_position != MaxSector && | ||
| 2664 | pers->start_reshape == NULL) { | ||
| 2665 | /* This personality cannot handle reshaping... */ | ||
| 2666 | mddev->pers = NULL; | ||
| 2667 | module_put(pers->owner); | ||
| 2668 | return -EINVAL; | ||
| 2669 | } | ||
| 2670 | |||
| 2542 | mddev->recovery = 0; | 2671 | mddev->recovery = 0; |
| 2543 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2672 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
| 2544 | mddev->barriers_work = 1; | 2673 | mddev->barriers_work = 1; |
| @@ -2772,7 +2901,6 @@ static void autorun_array(mddev_t *mddev) | |||
| 2772 | */ | 2901 | */ |
| 2773 | static void autorun_devices(int part) | 2902 | static void autorun_devices(int part) |
| 2774 | { | 2903 | { |
| 2775 | struct list_head candidates; | ||
| 2776 | struct list_head *tmp; | 2904 | struct list_head *tmp; |
| 2777 | mdk_rdev_t *rdev0, *rdev; | 2905 | mdk_rdev_t *rdev0, *rdev; |
| 2778 | mddev_t *mddev; | 2906 | mddev_t *mddev; |
| @@ -2781,6 +2909,7 @@ static void autorun_devices(int part) | |||
| 2781 | printk(KERN_INFO "md: autorun ...\n"); | 2909 | printk(KERN_INFO "md: autorun ...\n"); |
| 2782 | while (!list_empty(&pending_raid_disks)) { | 2910 | while (!list_empty(&pending_raid_disks)) { |
| 2783 | dev_t dev; | 2911 | dev_t dev; |
| 2912 | LIST_HEAD(candidates); | ||
| 2784 | rdev0 = list_entry(pending_raid_disks.next, | 2913 | rdev0 = list_entry(pending_raid_disks.next, |
| 2785 | mdk_rdev_t, same_set); | 2914 | mdk_rdev_t, same_set); |
| 2786 | 2915 | ||
| @@ -3427,11 +3556,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
| 3427 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 3556 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
| 3428 | mddev->bitmap_offset = 0; | 3557 | mddev->bitmap_offset = 0; |
| 3429 | 3558 | ||
| 3559 | mddev->reshape_position = MaxSector; | ||
| 3560 | |||
| 3430 | /* | 3561 | /* |
| 3431 | * Generate a 128 bit UUID | 3562 | * Generate a 128 bit UUID |
| 3432 | */ | 3563 | */ |
| 3433 | get_random_bytes(mddev->uuid, 16); | 3564 | get_random_bytes(mddev->uuid, 16); |
| 3434 | 3565 | ||
| 3566 | mddev->new_level = mddev->level; | ||
| 3567 | mddev->new_chunk = mddev->chunk_size; | ||
| 3568 | mddev->new_layout = mddev->layout; | ||
| 3569 | mddev->delta_disks = 0; | ||
| 3570 | |||
| 3435 | return 0; | 3571 | return 0; |
| 3436 | } | 3572 | } |
| 3437 | 3573 | ||
| @@ -3440,6 +3576,7 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
| 3440 | mdk_rdev_t * rdev; | 3576 | mdk_rdev_t * rdev; |
| 3441 | int rv; | 3577 | int rv; |
| 3442 | struct list_head *tmp; | 3578 | struct list_head *tmp; |
| 3579 | int fit = (size == 0); | ||
| 3443 | 3580 | ||
| 3444 | if (mddev->pers->resize == NULL) | 3581 | if (mddev->pers->resize == NULL) |
| 3445 | return -EINVAL; | 3582 | return -EINVAL; |
| @@ -3457,7 +3594,6 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
| 3457 | return -EBUSY; | 3594 | return -EBUSY; |
| 3458 | ITERATE_RDEV(mddev,rdev,tmp) { | 3595 | ITERATE_RDEV(mddev,rdev,tmp) { |
| 3459 | sector_t avail; | 3596 | sector_t avail; |
| 3460 | int fit = (size == 0); | ||
| 3461 | if (rdev->sb_offset > rdev->data_offset) | 3597 | if (rdev->sb_offset > rdev->data_offset) |
| 3462 | avail = (rdev->sb_offset*2) - rdev->data_offset; | 3598 | avail = (rdev->sb_offset*2) - rdev->data_offset; |
| 3463 | else | 3599 | else |
| @@ -3487,14 +3623,16 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) | |||
| 3487 | { | 3623 | { |
| 3488 | int rv; | 3624 | int rv; |
| 3489 | /* change the number of raid disks */ | 3625 | /* change the number of raid disks */ |
| 3490 | if (mddev->pers->reshape == NULL) | 3626 | if (mddev->pers->check_reshape == NULL) |
| 3491 | return -EINVAL; | 3627 | return -EINVAL; |
| 3492 | if (raid_disks <= 0 || | 3628 | if (raid_disks <= 0 || |
| 3493 | raid_disks >= mddev->max_disks) | 3629 | raid_disks >= mddev->max_disks) |
| 3494 | return -EINVAL; | 3630 | return -EINVAL; |
| 3495 | if (mddev->sync_thread) | 3631 | if (mddev->sync_thread || mddev->reshape_position != MaxSector) |
| 3496 | return -EBUSY; | 3632 | return -EBUSY; |
| 3497 | rv = mddev->pers->reshape(mddev, raid_disks); | 3633 | mddev->delta_disks = raid_disks - mddev->raid_disks; |
| 3634 | |||
| 3635 | rv = mddev->pers->check_reshape(mddev); | ||
| 3498 | return rv; | 3636 | return rv; |
| 3499 | } | 3637 | } |
| 3500 | 3638 | ||
| @@ -4041,7 +4179,10 @@ static void status_unused(struct seq_file *seq) | |||
| 4041 | 4179 | ||
| 4042 | static void status_resync(struct seq_file *seq, mddev_t * mddev) | 4180 | static void status_resync(struct seq_file *seq, mddev_t * mddev) |
| 4043 | { | 4181 | { |
| 4044 | unsigned long max_blocks, resync, res, dt, db, rt; | 4182 | sector_t max_blocks, resync, res; |
| 4183 | unsigned long dt, db, rt; | ||
| 4184 | int scale; | ||
| 4185 | unsigned int per_milli; | ||
| 4045 | 4186 | ||
| 4046 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; | 4187 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; |
| 4047 | 4188 | ||
| @@ -4057,9 +4198,22 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
| 4057 | MD_BUG(); | 4198 | MD_BUG(); |
| 4058 | return; | 4199 | return; |
| 4059 | } | 4200 | } |
| 4060 | res = (resync/1024)*1000/(max_blocks/1024 + 1); | 4201 | /* Pick 'scale' such that (resync>>scale)*1000 will fit |
| 4202 | * in a sector_t, and (max_blocks>>scale) will fit in a | ||
| 4203 | * u32, as those are the requirements for sector_div. | ||
| 4204 | * Thus 'scale' must be at least 10 | ||
| 4205 | */ | ||
| 4206 | scale = 10; | ||
| 4207 | if (sizeof(sector_t) > sizeof(unsigned long)) { | ||
| 4208 | while ( max_blocks/2 > (1ULL<<(scale+32))) | ||
| 4209 | scale++; | ||
| 4210 | } | ||
| 4211 | res = (resync>>scale)*1000; | ||
| 4212 | sector_div(res, (u32)((max_blocks>>scale)+1)); | ||
| 4213 | |||
| 4214 | per_milli = res; | ||
| 4061 | { | 4215 | { |
| 4062 | int i, x = res/50, y = 20-x; | 4216 | int i, x = per_milli/50, y = 20-x; |
| 4063 | seq_printf(seq, "["); | 4217 | seq_printf(seq, "["); |
| 4064 | for (i = 0; i < x; i++) | 4218 | for (i = 0; i < x; i++) |
| 4065 | seq_printf(seq, "="); | 4219 | seq_printf(seq, "="); |
| @@ -4068,10 +4222,14 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
| 4068 | seq_printf(seq, "."); | 4222 | seq_printf(seq, "."); |
| 4069 | seq_printf(seq, "] "); | 4223 | seq_printf(seq, "] "); |
| 4070 | } | 4224 | } |
| 4071 | seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", | 4225 | seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)", |
| 4226 | (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)? | ||
| 4227 | "reshape" : | ||
| 4072 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? | 4228 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? |
| 4073 | "resync" : "recovery"), | 4229 | "resync" : "recovery")), |
| 4074 | res/10, res % 10, resync, max_blocks); | 4230 | per_milli/10, per_milli % 10, |
| 4231 | (unsigned long long) resync, | ||
| 4232 | (unsigned long long) max_blocks); | ||
| 4075 | 4233 | ||
| 4076 | /* | 4234 | /* |
| 4077 | * We do not want to overflow, so the order of operands and | 4235 | * We do not want to overflow, so the order of operands and |
| @@ -4085,7 +4243,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
| 4085 | dt = ((jiffies - mddev->resync_mark) / HZ); | 4243 | dt = ((jiffies - mddev->resync_mark) / HZ); |
| 4086 | if (!dt) dt++; | 4244 | if (!dt) dt++; |
| 4087 | db = resync - (mddev->resync_mark_cnt/2); | 4245 | db = resync - (mddev->resync_mark_cnt/2); |
| 4088 | rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; | 4246 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; |
| 4089 | 4247 | ||
| 4090 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); | 4248 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); |
| 4091 | 4249 | ||
| @@ -4442,7 +4600,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); | |||
| 4442 | 4600 | ||
| 4443 | #define SYNC_MARKS 10 | 4601 | #define SYNC_MARKS 10 |
| 4444 | #define SYNC_MARK_STEP (3*HZ) | 4602 | #define SYNC_MARK_STEP (3*HZ) |
| 4445 | static void md_do_sync(mddev_t *mddev) | 4603 | void md_do_sync(mddev_t *mddev) |
| 4446 | { | 4604 | { |
| 4447 | mddev_t *mddev2; | 4605 | mddev_t *mddev2; |
| 4448 | unsigned int currspeed = 0, | 4606 | unsigned int currspeed = 0, |
| @@ -4522,7 +4680,9 @@ static void md_do_sync(mddev_t *mddev) | |||
| 4522 | */ | 4680 | */ |
| 4523 | max_sectors = mddev->resync_max_sectors; | 4681 | max_sectors = mddev->resync_max_sectors; |
| 4524 | mddev->resync_mismatches = 0; | 4682 | mddev->resync_mismatches = 0; |
| 4525 | } else | 4683 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| 4684 | max_sectors = mddev->size << 1; | ||
| 4685 | else | ||
| 4526 | /* recovery follows the physical size of devices */ | 4686 | /* recovery follows the physical size of devices */ |
| 4527 | max_sectors = mddev->size << 1; | 4687 | max_sectors = mddev->size << 1; |
| 4528 | 4688 | ||
| @@ -4658,6 +4818,8 @@ static void md_do_sync(mddev_t *mddev) | |||
| 4658 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); | 4818 | mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); |
| 4659 | 4819 | ||
| 4660 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 4820 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
| 4821 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
| 4822 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | ||
| 4661 | mddev->curr_resync > 2 && | 4823 | mddev->curr_resync > 2 && |
| 4662 | mddev->curr_resync >= mddev->recovery_cp) { | 4824 | mddev->curr_resync >= mddev->recovery_cp) { |
| 4663 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 4825 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
| @@ -4675,6 +4837,7 @@ static void md_do_sync(mddev_t *mddev) | |||
| 4675 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 4837 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
| 4676 | md_wakeup_thread(mddev->thread); | 4838 | md_wakeup_thread(mddev->thread); |
| 4677 | } | 4839 | } |
| 4840 | EXPORT_SYMBOL_GPL(md_do_sync); | ||
| 4678 | 4841 | ||
| 4679 | 4842 | ||
| 4680 | /* | 4843 | /* |
| @@ -4730,7 +4893,7 @@ void md_check_recovery(mddev_t *mddev) | |||
| 4730 | )) | 4893 | )) |
| 4731 | return; | 4894 | return; |
| 4732 | 4895 | ||
| 4733 | if (mddev_trylock(mddev)==0) { | 4896 | if (mddev_trylock(mddev)) { |
| 4734 | int spares =0; | 4897 | int spares =0; |
| 4735 | 4898 | ||
| 4736 | spin_lock_irq(&mddev->write_lock); | 4899 | spin_lock_irq(&mddev->write_lock); |
| @@ -4866,7 +5029,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
| 4866 | printk(KERN_INFO "md: stopping all md devices.\n"); | 5029 | printk(KERN_INFO "md: stopping all md devices.\n"); |
| 4867 | 5030 | ||
| 4868 | ITERATE_MDDEV(mddev,tmp) | 5031 | ITERATE_MDDEV(mddev,tmp) |
| 4869 | if (mddev_trylock(mddev)==0) | 5032 | if (mddev_trylock(mddev)) |
| 4870 | do_md_stop (mddev, 1); | 5033 | do_md_stop (mddev, 1); |
| 4871 | /* | 5034 | /* |
| 4872 | * certain more exotic SCSI devices are known to be | 5035 | * certain more exotic SCSI devices are known to be |
