diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 227 |
1 files changed, 155 insertions, 72 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 20ca80b7dc20..2897df90df44 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
36 | #include <linux/config.h> | 36 | #include <linux/config.h> |
37 | #include <linux/kthread.h> | ||
37 | #include <linux/linkage.h> | 38 | #include <linux/linkage.h> |
38 | #include <linux/raid/md.h> | 39 | #include <linux/raid/md.h> |
39 | #include <linux/raid/bitmap.h> | 40 | #include <linux/raid/bitmap.h> |
@@ -73,7 +74,7 @@ static DEFINE_SPINLOCK(pers_lock); | |||
73 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 74 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
74 | * is 1000 KB/sec, so the extra system load does not show up that much. | 75 | * is 1000 KB/sec, so the extra system load does not show up that much. |
75 | * Increase it if you want to have more _guaranteed_ speed. Note that | 76 | * Increase it if you want to have more _guaranteed_ speed. Note that |
76 | * the RAID driver will use the maximum available bandwith if the IO | 77 | * the RAID driver will use the maximum available bandwidth if the IO |
77 | * subsystem is idle. There is also an 'absolute maximum' reconstruction | 78 | * subsystem is idle. There is also an 'absolute maximum' reconstruction |
78 | * speed limit - in case reconstruction slows down your system despite | 79 | * speed limit - in case reconstruction slows down your system despite |
79 | * idle IO detection. | 80 | * idle IO detection. |
@@ -393,7 +394,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, | |||
393 | return ret; | 394 | return ret; |
394 | } | 395 | } |
395 | 396 | ||
396 | static int read_disk_sb(mdk_rdev_t * rdev) | 397 | static int read_disk_sb(mdk_rdev_t * rdev, int size) |
397 | { | 398 | { |
398 | char b[BDEVNAME_SIZE]; | 399 | char b[BDEVNAME_SIZE]; |
399 | if (!rdev->sb_page) { | 400 | if (!rdev->sb_page) { |
@@ -404,7 +405,7 @@ static int read_disk_sb(mdk_rdev_t * rdev) | |||
404 | return 0; | 405 | return 0; |
405 | 406 | ||
406 | 407 | ||
407 | if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) | 408 | if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) |
408 | goto fail; | 409 | goto fail; |
409 | rdev->sb_loaded = 1; | 410 | rdev->sb_loaded = 1; |
410 | return 0; | 411 | return 0; |
@@ -531,7 +532,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
531 | sb_offset = calc_dev_sboffset(rdev->bdev); | 532 | sb_offset = calc_dev_sboffset(rdev->bdev); |
532 | rdev->sb_offset = sb_offset; | 533 | rdev->sb_offset = sb_offset; |
533 | 534 | ||
534 | ret = read_disk_sb(rdev); | 535 | ret = read_disk_sb(rdev, MD_SB_BYTES); |
535 | if (ret) return ret; | 536 | if (ret) return ret; |
536 | 537 | ||
537 | ret = -EINVAL; | 538 | ret = -EINVAL; |
@@ -564,6 +565,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
564 | 565 | ||
565 | rdev->preferred_minor = sb->md_minor; | 566 | rdev->preferred_minor = sb->md_minor; |
566 | rdev->data_offset = 0; | 567 | rdev->data_offset = 0; |
568 | rdev->sb_size = MD_SB_BYTES; | ||
567 | 569 | ||
568 | if (sb->level == LEVEL_MULTIPATH) | 570 | if (sb->level == LEVEL_MULTIPATH) |
569 | rdev->desc_nr = -1; | 571 | rdev->desc_nr = -1; |
@@ -623,6 +625,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
623 | mddev->size = sb->size; | 625 | mddev->size = sb->size; |
624 | mddev->events = md_event(sb); | 626 | mddev->events = md_event(sb); |
625 | mddev->bitmap_offset = 0; | 627 | mddev->bitmap_offset = 0; |
628 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | ||
626 | 629 | ||
627 | if (sb->state & (1<<MD_SB_CLEAN)) | 630 | if (sb->state & (1<<MD_SB_CLEAN)) |
628 | mddev->recovery_cp = MaxSector; | 631 | mddev->recovery_cp = MaxSector; |
@@ -643,12 +646,12 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
643 | 646 | ||
644 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 647 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
645 | mddev->bitmap_file == NULL) { | 648 | mddev->bitmap_file == NULL) { |
646 | if (mddev->level != 1) { | 649 | if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) { |
647 | /* FIXME use a better test */ | 650 | /* FIXME use a better test */ |
648 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | 651 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); |
649 | return -EINVAL; | 652 | return -EINVAL; |
650 | } | 653 | } |
651 | mddev->bitmap_offset = (MD_SB_BYTES >> 9); | 654 | mddev->bitmap_offset = mddev->default_bitmap_offset; |
652 | } | 655 | } |
653 | 656 | ||
654 | } else if (mddev->pers == NULL) { | 657 | } else if (mddev->pers == NULL) { |
@@ -669,6 +672,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
669 | 672 | ||
670 | if (mddev->level != LEVEL_MULTIPATH) { | 673 | if (mddev->level != LEVEL_MULTIPATH) { |
671 | rdev->faulty = 0; | 674 | rdev->faulty = 0; |
675 | rdev->flags = 0; | ||
672 | desc = sb->disks + rdev->desc_nr; | 676 | desc = sb->disks + rdev->desc_nr; |
673 | 677 | ||
674 | if (desc->state & (1<<MD_DISK_FAULTY)) | 678 | if (desc->state & (1<<MD_DISK_FAULTY)) |
@@ -678,6 +682,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
678 | rdev->in_sync = 1; | 682 | rdev->in_sync = 1; |
679 | rdev->raid_disk = desc->raid_disk; | 683 | rdev->raid_disk = desc->raid_disk; |
680 | } | 684 | } |
685 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
686 | set_bit(WriteMostly, &rdev->flags); | ||
681 | } else /* MULTIPATH are always insync */ | 687 | } else /* MULTIPATH are always insync */ |
682 | rdev->in_sync = 1; | 688 | rdev->in_sync = 1; |
683 | return 0; | 689 | return 0; |
@@ -706,6 +712,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
706 | int i; | 712 | int i; |
707 | int active=0, working=0,failed=0,spare=0,nr_disks=0; | 713 | int active=0, working=0,failed=0,spare=0,nr_disks=0; |
708 | 714 | ||
715 | rdev->sb_size = MD_SB_BYTES; | ||
716 | |||
709 | sb = (mdp_super_t*)page_address(rdev->sb_page); | 717 | sb = (mdp_super_t*)page_address(rdev->sb_page); |
710 | 718 | ||
711 | memset(sb, 0, sizeof(*sb)); | 719 | memset(sb, 0, sizeof(*sb)); |
@@ -776,6 +784,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
776 | spare++; | 784 | spare++; |
777 | working++; | 785 | working++; |
778 | } | 786 | } |
787 | if (test_bit(WriteMostly, &rdev2->flags)) | ||
788 | d->state |= (1<<MD_DISK_WRITEMOSTLY); | ||
779 | } | 789 | } |
780 | 790 | ||
781 | /* now set the "removed" and "faulty" bits on any missing devices */ | 791 | /* now set the "removed" and "faulty" bits on any missing devices */ |
@@ -831,6 +841,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
831 | int ret; | 841 | int ret; |
832 | sector_t sb_offset; | 842 | sector_t sb_offset; |
833 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 843 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
844 | int bmask; | ||
834 | 845 | ||
835 | /* | 846 | /* |
836 | * Calculate the position of the superblock. | 847 | * Calculate the position of the superblock. |
@@ -859,7 +870,10 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
859 | } | 870 | } |
860 | rdev->sb_offset = sb_offset; | 871 | rdev->sb_offset = sb_offset; |
861 | 872 | ||
862 | ret = read_disk_sb(rdev); | 873 | /* superblock is rarely larger than 1K, but it can be larger, |
874 | * and it is safe to read 4k, so we do that | ||
875 | */ | ||
876 | ret = read_disk_sb(rdev, 4096); | ||
863 | if (ret) return ret; | 877 | if (ret) return ret; |
864 | 878 | ||
865 | 879 | ||
@@ -869,7 +883,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
869 | sb->major_version != cpu_to_le32(1) || | 883 | sb->major_version != cpu_to_le32(1) || |
870 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || | 884 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || |
871 | le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || | 885 | le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || |
872 | sb->feature_map != 0) | 886 | (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) |
873 | return -EINVAL; | 887 | return -EINVAL; |
874 | 888 | ||
875 | if (calc_sb_1_csum(sb) != sb->sb_csum) { | 889 | if (calc_sb_1_csum(sb) != sb->sb_csum) { |
@@ -885,6 +899,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
885 | rdev->preferred_minor = 0xffff; | 899 | rdev->preferred_minor = 0xffff; |
886 | rdev->data_offset = le64_to_cpu(sb->data_offset); | 900 | rdev->data_offset = le64_to_cpu(sb->data_offset); |
887 | 901 | ||
902 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; | ||
903 | bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; | ||
904 | if (rdev->sb_size & bmask) | ||
905 | rdev-> sb_size = (rdev->sb_size | bmask)+1; | ||
906 | |||
888 | if (refdev == 0) | 907 | if (refdev == 0) |
889 | return 1; | 908 | return 1; |
890 | else { | 909 | else { |
@@ -939,13 +958,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
939 | mddev->size = le64_to_cpu(sb->size)/2; | 958 | mddev->size = le64_to_cpu(sb->size)/2; |
940 | mddev->events = le64_to_cpu(sb->events); | 959 | mddev->events = le64_to_cpu(sb->events); |
941 | mddev->bitmap_offset = 0; | 960 | mddev->bitmap_offset = 0; |
961 | mddev->default_bitmap_offset = 0; | ||
962 | mddev->default_bitmap_offset = 1024; | ||
942 | 963 | ||
943 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); | 964 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); |
944 | memcpy(mddev->uuid, sb->set_uuid, 16); | 965 | memcpy(mddev->uuid, sb->set_uuid, 16); |
945 | 966 | ||
946 | mddev->max_disks = (4096-256)/2; | 967 | mddev->max_disks = (4096-256)/2; |
947 | 968 | ||
948 | if ((le32_to_cpu(sb->feature_map) & 1) && | 969 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && |
949 | mddev->bitmap_file == NULL ) { | 970 | mddev->bitmap_file == NULL ) { |
950 | if (mddev->level != 1) { | 971 | if (mddev->level != 1) { |
951 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); | 972 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); |
@@ -986,6 +1007,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
986 | rdev->raid_disk = role; | 1007 | rdev->raid_disk = role; |
987 | break; | 1008 | break; |
988 | } | 1009 | } |
1010 | rdev->flags = 0; | ||
1011 | if (sb->devflags & WriteMostly1) | ||
1012 | set_bit(WriteMostly, &rdev->flags); | ||
989 | } else /* MULTIPATH are always insync */ | 1013 | } else /* MULTIPATH are always insync */ |
990 | rdev->in_sync = 1; | 1014 | rdev->in_sync = 1; |
991 | 1015 | ||
@@ -1017,7 +1041,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1017 | 1041 | ||
1018 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | 1042 | if (mddev->bitmap && mddev->bitmap_file == NULL) { |
1019 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1043 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1020 | sb->feature_map = cpu_to_le32(1); | 1044 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1021 | } | 1045 | } |
1022 | 1046 | ||
1023 | max_dev = 0; | 1047 | max_dev = 0; |
@@ -1363,7 +1387,7 @@ repeat: | |||
1363 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1387 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1364 | if (!rdev->faulty) { | 1388 | if (!rdev->faulty) { |
1365 | md_super_write(mddev,rdev, | 1389 | md_super_write(mddev,rdev, |
1366 | rdev->sb_offset<<1, MD_SB_BYTES, | 1390 | rdev->sb_offset<<1, rdev->sb_size, |
1367 | rdev->sb_page); | 1391 | rdev->sb_page); |
1368 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | 1392 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", |
1369 | bdevname(rdev->bdev,b), | 1393 | bdevname(rdev->bdev,b), |
@@ -2073,6 +2097,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
2073 | info.state = 0; | 2097 | info.state = 0; |
2074 | if (mddev->in_sync) | 2098 | if (mddev->in_sync) |
2075 | info.state = (1<<MD_SB_CLEAN); | 2099 | info.state = (1<<MD_SB_CLEAN); |
2100 | if (mddev->bitmap && mddev->bitmap_offset) | ||
2101 | info.state = (1<<MD_SB_BITMAP_PRESENT); | ||
2076 | info.active_disks = active; | 2102 | info.active_disks = active; |
2077 | info.working_disks = working; | 2103 | info.working_disks = working; |
2078 | info.failed_disks = failed; | 2104 | info.failed_disks = failed; |
@@ -2087,7 +2113,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
2087 | return 0; | 2113 | return 0; |
2088 | } | 2114 | } |
2089 | 2115 | ||
2090 | static int get_bitmap_file(mddev_t * mddev, void * arg) | 2116 | static int get_bitmap_file(mddev_t * mddev, void __user * arg) |
2091 | { | 2117 | { |
2092 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | 2118 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ |
2093 | char *ptr, *buf = NULL; | 2119 | char *ptr, *buf = NULL; |
@@ -2146,6 +2172,8 @@ static int get_disk_info(mddev_t * mddev, void __user * arg) | |||
2146 | info.state |= (1<<MD_DISK_ACTIVE); | 2172 | info.state |= (1<<MD_DISK_ACTIVE); |
2147 | info.state |= (1<<MD_DISK_SYNC); | 2173 | info.state |= (1<<MD_DISK_SYNC); |
2148 | } | 2174 | } |
2175 | if (test_bit(WriteMostly, &rdev->flags)) | ||
2176 | info.state |= (1<<MD_DISK_WRITEMOSTLY); | ||
2149 | } else { | 2177 | } else { |
2150 | info.major = info.minor = 0; | 2178 | info.major = info.minor = 0; |
2151 | info.raid_disk = -1; | 2179 | info.raid_disk = -1; |
@@ -2210,8 +2238,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2210 | mdname(mddev)); | 2238 | mdname(mddev)); |
2211 | return -EINVAL; | 2239 | return -EINVAL; |
2212 | } | 2240 | } |
2213 | rdev = md_import_device(dev, mddev->major_version, | 2241 | if (mddev->persistent) |
2214 | mddev->minor_version); | 2242 | rdev = md_import_device(dev, mddev->major_version, |
2243 | mddev->minor_version); | ||
2244 | else | ||
2245 | rdev = md_import_device(dev, -1, -1); | ||
2215 | if (IS_ERR(rdev)) { | 2246 | if (IS_ERR(rdev)) { |
2216 | printk(KERN_WARNING | 2247 | printk(KERN_WARNING |
2217 | "md: md_import_device returned %ld\n", | 2248 | "md: md_import_device returned %ld\n", |
@@ -2231,6 +2262,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2231 | rdev->saved_raid_disk = rdev->raid_disk; | 2262 | rdev->saved_raid_disk = rdev->raid_disk; |
2232 | 2263 | ||
2233 | rdev->in_sync = 0; /* just to be sure */ | 2264 | rdev->in_sync = 0; /* just to be sure */ |
2265 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
2266 | set_bit(WriteMostly, &rdev->flags); | ||
2267 | |||
2234 | rdev->raid_disk = -1; | 2268 | rdev->raid_disk = -1; |
2235 | err = bind_rdev_to_array(rdev, mddev); | 2269 | err = bind_rdev_to_array(rdev, mddev); |
2236 | if (err) | 2270 | if (err) |
@@ -2271,6 +2305,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2271 | else | 2305 | else |
2272 | rdev->in_sync = 0; | 2306 | rdev->in_sync = 0; |
2273 | 2307 | ||
2308 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
2309 | set_bit(WriteMostly, &rdev->flags); | ||
2310 | |||
2274 | err = bind_rdev_to_array(rdev, mddev); | 2311 | err = bind_rdev_to_array(rdev, mddev); |
2275 | if (err) { | 2312 | if (err) { |
2276 | export_rdev(rdev); | 2313 | export_rdev(rdev); |
@@ -2430,25 +2467,51 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
2430 | { | 2467 | { |
2431 | int err; | 2468 | int err; |
2432 | 2469 | ||
2433 | if (mddev->pers) | 2470 | if (mddev->pers) { |
2434 | return -EBUSY; | 2471 | if (!mddev->pers->quiesce) |
2472 | return -EBUSY; | ||
2473 | if (mddev->recovery || mddev->sync_thread) | ||
2474 | return -EBUSY; | ||
2475 | /* we should be able to change the bitmap.. */ | ||
2476 | } | ||
2435 | 2477 | ||
2436 | mddev->bitmap_file = fget(fd); | ||
2437 | 2478 | ||
2438 | if (mddev->bitmap_file == NULL) { | 2479 | if (fd >= 0) { |
2439 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | 2480 | if (mddev->bitmap) |
2440 | mdname(mddev)); | 2481 | return -EEXIST; /* cannot add when bitmap is present */ |
2441 | return -EBADF; | 2482 | mddev->bitmap_file = fget(fd); |
2442 | } | ||
2443 | 2483 | ||
2444 | err = deny_bitmap_write_access(mddev->bitmap_file); | 2484 | if (mddev->bitmap_file == NULL) { |
2445 | if (err) { | 2485 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", |
2446 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | 2486 | mdname(mddev)); |
2447 | mdname(mddev)); | 2487 | return -EBADF; |
2448 | fput(mddev->bitmap_file); | 2488 | } |
2449 | mddev->bitmap_file = NULL; | 2489 | |
2450 | } else | 2490 | err = deny_bitmap_write_access(mddev->bitmap_file); |
2491 | if (err) { | ||
2492 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
2493 | mdname(mddev)); | ||
2494 | fput(mddev->bitmap_file); | ||
2495 | mddev->bitmap_file = NULL; | ||
2496 | return err; | ||
2497 | } | ||
2451 | mddev->bitmap_offset = 0; /* file overrides offset */ | 2498 | mddev->bitmap_offset = 0; /* file overrides offset */ |
2499 | } else if (mddev->bitmap == NULL) | ||
2500 | return -ENOENT; /* cannot remove what isn't there */ | ||
2501 | err = 0; | ||
2502 | if (mddev->pers) { | ||
2503 | mddev->pers->quiesce(mddev, 1); | ||
2504 | if (fd >= 0) | ||
2505 | err = bitmap_create(mddev); | ||
2506 | if (fd < 0 || err) | ||
2507 | bitmap_destroy(mddev); | ||
2508 | mddev->pers->quiesce(mddev, 0); | ||
2509 | } else if (fd < 0) { | ||
2510 | if (mddev->bitmap_file) | ||
2511 | fput(mddev->bitmap_file); | ||
2512 | mddev->bitmap_file = NULL; | ||
2513 | } | ||
2514 | |||
2452 | return err; | 2515 | return err; |
2453 | } | 2516 | } |
2454 | 2517 | ||
@@ -2528,6 +2591,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
2528 | { | 2591 | { |
2529 | int rv = 0; | 2592 | int rv = 0; |
2530 | int cnt = 0; | 2593 | int cnt = 0; |
2594 | int state = 0; | ||
2595 | |||
2596 | /* calculate expected state,ignoring low bits */ | ||
2597 | if (mddev->bitmap && mddev->bitmap_offset) | ||
2598 | state |= (1 << MD_SB_BITMAP_PRESENT); | ||
2531 | 2599 | ||
2532 | if (mddev->major_version != info->major_version || | 2600 | if (mddev->major_version != info->major_version || |
2533 | mddev->minor_version != info->minor_version || | 2601 | mddev->minor_version != info->minor_version || |
@@ -2536,12 +2604,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
2536 | mddev->level != info->level || | 2604 | mddev->level != info->level || |
2537 | /* mddev->layout != info->layout || */ | 2605 | /* mddev->layout != info->layout || */ |
2538 | !mddev->persistent != info->not_persistent|| | 2606 | !mddev->persistent != info->not_persistent|| |
2539 | mddev->chunk_size != info->chunk_size ) | 2607 | mddev->chunk_size != info->chunk_size || |
2608 | /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ | ||
2609 | ((state^info->state) & 0xfffffe00) | ||
2610 | ) | ||
2540 | return -EINVAL; | 2611 | return -EINVAL; |
2541 | /* Check there is only one change */ | 2612 | /* Check there is only one change */ |
2542 | if (mddev->size != info->size) cnt++; | 2613 | if (mddev->size != info->size) cnt++; |
2543 | if (mddev->raid_disks != info->raid_disks) cnt++; | 2614 | if (mddev->raid_disks != info->raid_disks) cnt++; |
2544 | if (mddev->layout != info->layout) cnt++; | 2615 | if (mddev->layout != info->layout) cnt++; |
2616 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; | ||
2545 | if (cnt == 0) return 0; | 2617 | if (cnt == 0) return 0; |
2546 | if (cnt > 1) return -EINVAL; | 2618 | if (cnt > 1) return -EINVAL; |
2547 | 2619 | ||
@@ -2620,6 +2692,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
2620 | } | 2692 | } |
2621 | } | 2693 | } |
2622 | } | 2694 | } |
2695 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { | ||
2696 | if (mddev->pers->quiesce == NULL) | ||
2697 | return -EINVAL; | ||
2698 | if (mddev->recovery || mddev->sync_thread) | ||
2699 | return -EBUSY; | ||
2700 | if (info->state & (1<<MD_SB_BITMAP_PRESENT)) { | ||
2701 | /* add the bitmap */ | ||
2702 | if (mddev->bitmap) | ||
2703 | return -EEXIST; | ||
2704 | if (mddev->default_bitmap_offset == 0) | ||
2705 | return -EINVAL; | ||
2706 | mddev->bitmap_offset = mddev->default_bitmap_offset; | ||
2707 | mddev->pers->quiesce(mddev, 1); | ||
2708 | rv = bitmap_create(mddev); | ||
2709 | if (rv) | ||
2710 | bitmap_destroy(mddev); | ||
2711 | mddev->pers->quiesce(mddev, 0); | ||
2712 | } else { | ||
2713 | /* remove the bitmap */ | ||
2714 | if (!mddev->bitmap) | ||
2715 | return -ENOENT; | ||
2716 | if (mddev->bitmap->file) | ||
2717 | return -EINVAL; | ||
2718 | mddev->pers->quiesce(mddev, 1); | ||
2719 | bitmap_destroy(mddev); | ||
2720 | mddev->pers->quiesce(mddev, 0); | ||
2721 | mddev->bitmap_offset = 0; | ||
2722 | } | ||
2723 | } | ||
2623 | md_update_sb(mddev); | 2724 | md_update_sb(mddev); |
2624 | return rv; | 2725 | return rv; |
2625 | } | 2726 | } |
@@ -2781,7 +2882,7 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
2781 | goto done_unlock; | 2882 | goto done_unlock; |
2782 | 2883 | ||
2783 | case GET_BITMAP_FILE: | 2884 | case GET_BITMAP_FILE: |
2784 | err = get_bitmap_file(mddev, (void *)arg); | 2885 | err = get_bitmap_file(mddev, argp); |
2785 | goto done_unlock; | 2886 | goto done_unlock; |
2786 | 2887 | ||
2787 | case GET_DISK_INFO: | 2888 | case GET_DISK_INFO: |
@@ -2950,18 +3051,6 @@ static int md_thread(void * arg) | |||
2950 | { | 3051 | { |
2951 | mdk_thread_t *thread = arg; | 3052 | mdk_thread_t *thread = arg; |
2952 | 3053 | ||
2953 | lock_kernel(); | ||
2954 | |||
2955 | /* | ||
2956 | * Detach thread | ||
2957 | */ | ||
2958 | |||
2959 | daemonize(thread->name, mdname(thread->mddev)); | ||
2960 | |||
2961 | current->exit_signal = SIGCHLD; | ||
2962 | allow_signal(SIGKILL); | ||
2963 | thread->tsk = current; | ||
2964 | |||
2965 | /* | 3054 | /* |
2966 | * md_thread is a 'system-thread', it's priority should be very | 3055 | * md_thread is a 'system-thread', it's priority should be very |
2967 | * high. We avoid resource deadlocks individually in each | 3056 | * high. We avoid resource deadlocks individually in each |
@@ -2973,14 +3062,14 @@ static int md_thread(void * arg) | |||
2973 | * bdflush, otherwise bdflush will deadlock if there are too | 3062 | * bdflush, otherwise bdflush will deadlock if there are too |
2974 | * many dirty RAID5 blocks. | 3063 | * many dirty RAID5 blocks. |
2975 | */ | 3064 | */ |
2976 | unlock_kernel(); | ||
2977 | 3065 | ||
2978 | complete(thread->event); | 3066 | complete(thread->event); |
2979 | while (thread->run) { | 3067 | while (!kthread_should_stop()) { |
2980 | void (*run)(mddev_t *); | 3068 | void (*run)(mddev_t *); |
2981 | 3069 | ||
2982 | wait_event_interruptible_timeout(thread->wqueue, | 3070 | wait_event_interruptible_timeout(thread->wqueue, |
2983 | test_bit(THREAD_WAKEUP, &thread->flags), | 3071 | test_bit(THREAD_WAKEUP, &thread->flags) |
3072 | || kthread_should_stop(), | ||
2984 | thread->timeout); | 3073 | thread->timeout); |
2985 | try_to_freeze(); | 3074 | try_to_freeze(); |
2986 | 3075 | ||
@@ -2989,11 +3078,8 @@ static int md_thread(void * arg) | |||
2989 | run = thread->run; | 3078 | run = thread->run; |
2990 | if (run) | 3079 | if (run) |
2991 | run(thread->mddev); | 3080 | run(thread->mddev); |
2992 | |||
2993 | if (signal_pending(current)) | ||
2994 | flush_signals(current); | ||
2995 | } | 3081 | } |
2996 | complete(thread->event); | 3082 | |
2997 | return 0; | 3083 | return 0; |
2998 | } | 3084 | } |
2999 | 3085 | ||
@@ -3010,11 +3096,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3010 | const char *name) | 3096 | const char *name) |
3011 | { | 3097 | { |
3012 | mdk_thread_t *thread; | 3098 | mdk_thread_t *thread; |
3013 | int ret; | ||
3014 | struct completion event; | 3099 | struct completion event; |
3015 | 3100 | ||
3016 | thread = (mdk_thread_t *) kmalloc | 3101 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); |
3017 | (sizeof(mdk_thread_t), GFP_KERNEL); | ||
3018 | if (!thread) | 3102 | if (!thread) |
3019 | return NULL; | 3103 | return NULL; |
3020 | 3104 | ||
@@ -3027,8 +3111,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3027 | thread->mddev = mddev; | 3111 | thread->mddev = mddev; |
3028 | thread->name = name; | 3112 | thread->name = name; |
3029 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | 3113 | thread->timeout = MAX_SCHEDULE_TIMEOUT; |
3030 | ret = kernel_thread(md_thread, thread, 0); | 3114 | thread->tsk = kthread_run(md_thread, thread, mdname(thread->mddev)); |
3031 | if (ret < 0) { | 3115 | if (IS_ERR(thread->tsk)) { |
3032 | kfree(thread); | 3116 | kfree(thread); |
3033 | return NULL; | 3117 | return NULL; |
3034 | } | 3118 | } |
@@ -3038,21 +3122,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
3038 | 3122 | ||
3039 | void md_unregister_thread(mdk_thread_t *thread) | 3123 | void md_unregister_thread(mdk_thread_t *thread) |
3040 | { | 3124 | { |
3041 | struct completion event; | ||
3042 | |||
3043 | init_completion(&event); | ||
3044 | |||
3045 | thread->event = &event; | ||
3046 | |||
3047 | /* As soon as ->run is set to NULL, the task could disappear, | ||
3048 | * so we need to hold tasklist_lock until we have sent the signal | ||
3049 | */ | ||
3050 | dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); | 3125 | dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); |
3051 | read_lock(&tasklist_lock); | 3126 | |
3052 | thread->run = NULL; | 3127 | kthread_stop(thread->tsk); |
3053 | send_sig(SIGKILL, thread->tsk, 1); | ||
3054 | read_unlock(&tasklist_lock); | ||
3055 | wait_for_completion(&event); | ||
3056 | kfree(thread); | 3128 | kfree(thread); |
3057 | } | 3129 | } |
3058 | 3130 | ||
@@ -3259,10 +3331,13 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3259 | char b[BDEVNAME_SIZE]; | 3331 | char b[BDEVNAME_SIZE]; |
3260 | seq_printf(seq, " %s[%d]", | 3332 | seq_printf(seq, " %s[%d]", |
3261 | bdevname(rdev->bdev,b), rdev->desc_nr); | 3333 | bdevname(rdev->bdev,b), rdev->desc_nr); |
3334 | if (test_bit(WriteMostly, &rdev->flags)) | ||
3335 | seq_printf(seq, "(W)"); | ||
3262 | if (rdev->faulty) { | 3336 | if (rdev->faulty) { |
3263 | seq_printf(seq, "(F)"); | 3337 | seq_printf(seq, "(F)"); |
3264 | continue; | 3338 | continue; |
3265 | } | 3339 | } else if (rdev->raid_disk < 0) |
3340 | seq_printf(seq, "(S)"); /* spare */ | ||
3266 | size += rdev->size; | 3341 | size += rdev->size; |
3267 | } | 3342 | } |
3268 | 3343 | ||
@@ -3274,6 +3349,15 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
3274 | seq_printf(seq, "\n %llu blocks", | 3349 | seq_printf(seq, "\n %llu blocks", |
3275 | (unsigned long long)size); | 3350 | (unsigned long long)size); |
3276 | } | 3351 | } |
3352 | if (mddev->persistent) { | ||
3353 | if (mddev->major_version != 0 || | ||
3354 | mddev->minor_version != 90) { | ||
3355 | seq_printf(seq," super %d.%d", | ||
3356 | mddev->major_version, | ||
3357 | mddev->minor_version); | ||
3358 | } | ||
3359 | } else | ||
3360 | seq_printf(seq, " super non-persistent"); | ||
3277 | 3361 | ||
3278 | if (mddev->pers) { | 3362 | if (mddev->pers) { |
3279 | mddev->pers->status (seq, mddev); | 3363 | mddev->pers->status (seq, mddev); |
@@ -3416,7 +3500,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
3416 | */ | 3500 | */ |
3417 | void md_write_start(mddev_t *mddev, struct bio *bi) | 3501 | void md_write_start(mddev_t *mddev, struct bio *bi) |
3418 | { | 3502 | { |
3419 | DEFINE_WAIT(w); | ||
3420 | if (bio_data_dir(bi) != WRITE) | 3503 | if (bio_data_dir(bi) != WRITE) |
3421 | return; | 3504 | return; |
3422 | 3505 | ||
@@ -3533,7 +3616,7 @@ static void md_do_sync(mddev_t *mddev) | |||
3533 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); | 3616 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); |
3534 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" | 3617 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" |
3535 | " %d KB/sec/disc.\n", sysctl_speed_limit_min); | 3618 | " %d KB/sec/disc.\n", sysctl_speed_limit_min); |
3536 | printk(KERN_INFO "md: using maximum available idle IO bandwith " | 3619 | printk(KERN_INFO "md: using maximum available idle IO bandwidth " |
3537 | "(but not more than %d KB/sec) for reconstruction.\n", | 3620 | "(but not more than %d KB/sec) for reconstruction.\n", |
3538 | sysctl_speed_limit_max); | 3621 | sysctl_speed_limit_max); |
3539 | 3622 | ||