diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 227 |
1 files changed, 155 insertions, 72 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 20ca80b7dc20..2897df90df44 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | 34 | ||
| 35 | #include <linux/module.h> | 35 | #include <linux/module.h> |
| 36 | #include <linux/config.h> | 36 | #include <linux/config.h> |
| 37 | #include <linux/kthread.h> | ||
| 37 | #include <linux/linkage.h> | 38 | #include <linux/linkage.h> |
| 38 | #include <linux/raid/md.h> | 39 | #include <linux/raid/md.h> |
| 39 | #include <linux/raid/bitmap.h> | 40 | #include <linux/raid/bitmap.h> |
| @@ -73,7 +74,7 @@ static DEFINE_SPINLOCK(pers_lock); | |||
| 73 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' | 74 | * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' |
| 74 | * is 1000 KB/sec, so the extra system load does not show up that much. | 75 | * is 1000 KB/sec, so the extra system load does not show up that much. |
| 75 | * Increase it if you want to have more _guaranteed_ speed. Note that | 76 | * Increase it if you want to have more _guaranteed_ speed. Note that |
| 76 | * the RAID driver will use the maximum available bandwith if the IO | 77 | * the RAID driver will use the maximum available bandwidth if the IO |
| 77 | * subsystem is idle. There is also an 'absolute maximum' reconstruction | 78 | * subsystem is idle. There is also an 'absolute maximum' reconstruction |
| 78 | * speed limit - in case reconstruction slows down your system despite | 79 | * speed limit - in case reconstruction slows down your system despite |
| 79 | * idle IO detection. | 80 | * idle IO detection. |
| @@ -393,7 +394,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, | |||
| 393 | return ret; | 394 | return ret; |
| 394 | } | 395 | } |
| 395 | 396 | ||
| 396 | static int read_disk_sb(mdk_rdev_t * rdev) | 397 | static int read_disk_sb(mdk_rdev_t * rdev, int size) |
| 397 | { | 398 | { |
| 398 | char b[BDEVNAME_SIZE]; | 399 | char b[BDEVNAME_SIZE]; |
| 399 | if (!rdev->sb_page) { | 400 | if (!rdev->sb_page) { |
| @@ -404,7 +405,7 @@ static int read_disk_sb(mdk_rdev_t * rdev) | |||
| 404 | return 0; | 405 | return 0; |
| 405 | 406 | ||
| 406 | 407 | ||
| 407 | if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) | 408 | if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) |
| 408 | goto fail; | 409 | goto fail; |
| 409 | rdev->sb_loaded = 1; | 410 | rdev->sb_loaded = 1; |
| 410 | return 0; | 411 | return 0; |
| @@ -531,7 +532,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
| 531 | sb_offset = calc_dev_sboffset(rdev->bdev); | 532 | sb_offset = calc_dev_sboffset(rdev->bdev); |
| 532 | rdev->sb_offset = sb_offset; | 533 | rdev->sb_offset = sb_offset; |
| 533 | 534 | ||
| 534 | ret = read_disk_sb(rdev); | 535 | ret = read_disk_sb(rdev, MD_SB_BYTES); |
| 535 | if (ret) return ret; | 536 | if (ret) return ret; |
| 536 | 537 | ||
| 537 | ret = -EINVAL; | 538 | ret = -EINVAL; |
| @@ -564,6 +565,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
| 564 | 565 | ||
| 565 | rdev->preferred_minor = sb->md_minor; | 566 | rdev->preferred_minor = sb->md_minor; |
| 566 | rdev->data_offset = 0; | 567 | rdev->data_offset = 0; |
| 568 | rdev->sb_size = MD_SB_BYTES; | ||
| 567 | 569 | ||
| 568 | if (sb->level == LEVEL_MULTIPATH) | 570 | if (sb->level == LEVEL_MULTIPATH) |
| 569 | rdev->desc_nr = -1; | 571 | rdev->desc_nr = -1; |
| @@ -623,6 +625,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 623 | mddev->size = sb->size; | 625 | mddev->size = sb->size; |
| 624 | mddev->events = md_event(sb); | 626 | mddev->events = md_event(sb); |
| 625 | mddev->bitmap_offset = 0; | 627 | mddev->bitmap_offset = 0; |
| 628 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | ||
| 626 | 629 | ||
| 627 | if (sb->state & (1<<MD_SB_CLEAN)) | 630 | if (sb->state & (1<<MD_SB_CLEAN)) |
| 628 | mddev->recovery_cp = MaxSector; | 631 | mddev->recovery_cp = MaxSector; |
| @@ -643,12 +646,12 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 643 | 646 | ||
| 644 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && | 647 | if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && |
| 645 | mddev->bitmap_file == NULL) { | 648 | mddev->bitmap_file == NULL) { |
| 646 | if (mddev->level != 1) { | 649 | if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) { |
| 647 | /* FIXME use a better test */ | 650 | /* FIXME use a better test */ |
| 648 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); | 651 | printk(KERN_WARNING "md: bitmaps only support for raid1\n"); |
| 649 | return -EINVAL; | 652 | return -EINVAL; |
| 650 | } | 653 | } |
| 651 | mddev->bitmap_offset = (MD_SB_BYTES >> 9); | 654 | mddev->bitmap_offset = mddev->default_bitmap_offset; |
| 652 | } | 655 | } |
| 653 | 656 | ||
| 654 | } else if (mddev->pers == NULL) { | 657 | } else if (mddev->pers == NULL) { |
| @@ -669,6 +672,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 669 | 672 | ||
| 670 | if (mddev->level != LEVEL_MULTIPATH) { | 673 | if (mddev->level != LEVEL_MULTIPATH) { |
| 671 | rdev->faulty = 0; | 674 | rdev->faulty = 0; |
| 675 | rdev->flags = 0; | ||
| 672 | desc = sb->disks + rdev->desc_nr; | 676 | desc = sb->disks + rdev->desc_nr; |
| 673 | 677 | ||
| 674 | if (desc->state & (1<<MD_DISK_FAULTY)) | 678 | if (desc->state & (1<<MD_DISK_FAULTY)) |
| @@ -678,6 +682,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 678 | rdev->in_sync = 1; | 682 | rdev->in_sync = 1; |
| 679 | rdev->raid_disk = desc->raid_disk; | 683 | rdev->raid_disk = desc->raid_disk; |
| 680 | } | 684 | } |
| 685 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
| 686 | set_bit(WriteMostly, &rdev->flags); | ||
| 681 | } else /* MULTIPATH are always insync */ | 687 | } else /* MULTIPATH are always insync */ |
| 682 | rdev->in_sync = 1; | 688 | rdev->in_sync = 1; |
| 683 | return 0; | 689 | return 0; |
| @@ -706,6 +712,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 706 | int i; | 712 | int i; |
| 707 | int active=0, working=0,failed=0,spare=0,nr_disks=0; | 713 | int active=0, working=0,failed=0,spare=0,nr_disks=0; |
| 708 | 714 | ||
| 715 | rdev->sb_size = MD_SB_BYTES; | ||
| 716 | |||
| 709 | sb = (mdp_super_t*)page_address(rdev->sb_page); | 717 | sb = (mdp_super_t*)page_address(rdev->sb_page); |
| 710 | 718 | ||
| 711 | memset(sb, 0, sizeof(*sb)); | 719 | memset(sb, 0, sizeof(*sb)); |
| @@ -776,6 +784,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 776 | spare++; | 784 | spare++; |
| 777 | working++; | 785 | working++; |
| 778 | } | 786 | } |
| 787 | if (test_bit(WriteMostly, &rdev2->flags)) | ||
| 788 | d->state |= (1<<MD_DISK_WRITEMOSTLY); | ||
| 779 | } | 789 | } |
| 780 | 790 | ||
| 781 | /* now set the "removed" and "faulty" bits on any missing devices */ | 791 | /* now set the "removed" and "faulty" bits on any missing devices */ |
| @@ -831,6 +841,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 831 | int ret; | 841 | int ret; |
| 832 | sector_t sb_offset; | 842 | sector_t sb_offset; |
| 833 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 843 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
| 844 | int bmask; | ||
| 834 | 845 | ||
| 835 | /* | 846 | /* |
| 836 | * Calculate the position of the superblock. | 847 | * Calculate the position of the superblock. |
| @@ -859,7 +870,10 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 859 | } | 870 | } |
| 860 | rdev->sb_offset = sb_offset; | 871 | rdev->sb_offset = sb_offset; |
| 861 | 872 | ||
| 862 | ret = read_disk_sb(rdev); | 873 | /* superblock is rarely larger than 1K, but it can be larger, |
| 874 | * and it is safe to read 4k, so we do that | ||
| 875 | */ | ||
| 876 | ret = read_disk_sb(rdev, 4096); | ||
| 863 | if (ret) return ret; | 877 | if (ret) return ret; |
| 864 | 878 | ||
| 865 | 879 | ||
| @@ -869,7 +883,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 869 | sb->major_version != cpu_to_le32(1) || | 883 | sb->major_version != cpu_to_le32(1) || |
| 870 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || | 884 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || |
| 871 | le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || | 885 | le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || |
| 872 | sb->feature_map != 0) | 886 | (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) |
| 873 | return -EINVAL; | 887 | return -EINVAL; |
| 874 | 888 | ||
| 875 | if (calc_sb_1_csum(sb) != sb->sb_csum) { | 889 | if (calc_sb_1_csum(sb) != sb->sb_csum) { |
| @@ -885,6 +899,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
| 885 | rdev->preferred_minor = 0xffff; | 899 | rdev->preferred_minor = 0xffff; |
| 886 | rdev->data_offset = le64_to_cpu(sb->data_offset); | 900 | rdev->data_offset = le64_to_cpu(sb->data_offset); |
| 887 | 901 | ||
| 902 | rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; | ||
| 903 | bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; | ||
| 904 | if (rdev->sb_size & bmask) | ||
| 905 | rdev-> sb_size = (rdev->sb_size | bmask)+1; | ||
| 906 | |||
| 888 | if (refdev == 0) | 907 | if (refdev == 0) |
| 889 | return 1; | 908 | return 1; |
| 890 | else { | 909 | else { |
| @@ -939,13 +958,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 939 | mddev->size = le64_to_cpu(sb->size)/2; | 958 | mddev->size = le64_to_cpu(sb->size)/2; |
| 940 | mddev->events = le64_to_cpu(sb->events); | 959 | mddev->events = le64_to_cpu(sb->events); |
| 941 | mddev->bitmap_offset = 0; | 960 | mddev->bitmap_offset = 0; |
| 961 | mddev->default_bitmap_offset = 0; | ||
| 962 | mddev->default_bitmap_offset = 1024; | ||
| 942 | 963 | ||
| 943 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); | 964 | mddev->recovery_cp = le64_to_cpu(sb->resync_offset); |
| 944 | memcpy(mddev->uuid, sb->set_uuid, 16); | 965 | memcpy(mddev->uuid, sb->set_uuid, 16); |
| 945 | 966 | ||
| 946 | mddev->max_disks = (4096-256)/2; | 967 | mddev->max_disks = (4096-256)/2; |
| 947 | 968 | ||
| 948 | if ((le32_to_cpu(sb->feature_map) & 1) && | 969 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && |
| 949 | mddev->bitmap_file == NULL ) { | 970 | mddev->bitmap_file == NULL ) { |
| 950 | if (mddev->level != 1) { | 971 | if (mddev->level != 1) { |
| 951 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); | 972 | printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); |
| @@ -986,6 +1007,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 986 | rdev->raid_disk = role; | 1007 | rdev->raid_disk = role; |
| 987 | break; | 1008 | break; |
| 988 | } | 1009 | } |
| 1010 | rdev->flags = 0; | ||
| 1011 | if (sb->devflags & WriteMostly1) | ||
| 1012 | set_bit(WriteMostly, &rdev->flags); | ||
| 989 | } else /* MULTIPATH are always insync */ | 1013 | } else /* MULTIPATH are always insync */ |
| 990 | rdev->in_sync = 1; | 1014 | rdev->in_sync = 1; |
| 991 | 1015 | ||
| @@ -1017,7 +1041,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1017 | 1041 | ||
| 1018 | if (mddev->bitmap && mddev->bitmap_file == NULL) { | 1042 | if (mddev->bitmap && mddev->bitmap_file == NULL) { |
| 1019 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1043 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
| 1020 | sb->feature_map = cpu_to_le32(1); | 1044 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
| 1021 | } | 1045 | } |
| 1022 | 1046 | ||
| 1023 | max_dev = 0; | 1047 | max_dev = 0; |
| @@ -1363,7 +1387,7 @@ repeat: | |||
| 1363 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1387 | dprintk("%s ", bdevname(rdev->bdev,b)); |
| 1364 | if (!rdev->faulty) { | 1388 | if (!rdev->faulty) { |
| 1365 | md_super_write(mddev,rdev, | 1389 | md_super_write(mddev,rdev, |
| 1366 | rdev->sb_offset<<1, MD_SB_BYTES, | 1390 | rdev->sb_offset<<1, rdev->sb_size, |
| 1367 | rdev->sb_page); | 1391 | rdev->sb_page); |
| 1368 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | 1392 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", |
| 1369 | bdevname(rdev->bdev,b), | 1393 | bdevname(rdev->bdev,b), |
| @@ -2073,6 +2097,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
| 2073 | info.state = 0; | 2097 | info.state = 0; |
| 2074 | if (mddev->in_sync) | 2098 | if (mddev->in_sync) |
| 2075 | info.state = (1<<MD_SB_CLEAN); | 2099 | info.state = (1<<MD_SB_CLEAN); |
| 2100 | if (mddev->bitmap && mddev->bitmap_offset) | ||
| 2101 | info.state = (1<<MD_SB_BITMAP_PRESENT); | ||
| 2076 | info.active_disks = active; | 2102 | info.active_disks = active; |
| 2077 | info.working_disks = working; | 2103 | info.working_disks = working; |
| 2078 | info.failed_disks = failed; | 2104 | info.failed_disks = failed; |
| @@ -2087,7 +2113,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) | |||
| 2087 | return 0; | 2113 | return 0; |
| 2088 | } | 2114 | } |
| 2089 | 2115 | ||
| 2090 | static int get_bitmap_file(mddev_t * mddev, void * arg) | 2116 | static int get_bitmap_file(mddev_t * mddev, void __user * arg) |
| 2091 | { | 2117 | { |
| 2092 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ | 2118 | mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ |
| 2093 | char *ptr, *buf = NULL; | 2119 | char *ptr, *buf = NULL; |
| @@ -2146,6 +2172,8 @@ static int get_disk_info(mddev_t * mddev, void __user * arg) | |||
| 2146 | info.state |= (1<<MD_DISK_ACTIVE); | 2172 | info.state |= (1<<MD_DISK_ACTIVE); |
| 2147 | info.state |= (1<<MD_DISK_SYNC); | 2173 | info.state |= (1<<MD_DISK_SYNC); |
| 2148 | } | 2174 | } |
| 2175 | if (test_bit(WriteMostly, &rdev->flags)) | ||
| 2176 | info.state |= (1<<MD_DISK_WRITEMOSTLY); | ||
| 2149 | } else { | 2177 | } else { |
| 2150 | info.major = info.minor = 0; | 2178 | info.major = info.minor = 0; |
| 2151 | info.raid_disk = -1; | 2179 | info.raid_disk = -1; |
| @@ -2210,8 +2238,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
| 2210 | mdname(mddev)); | 2238 | mdname(mddev)); |
| 2211 | return -EINVAL; | 2239 | return -EINVAL; |
| 2212 | } | 2240 | } |
| 2213 | rdev = md_import_device(dev, mddev->major_version, | 2241 | if (mddev->persistent) |
| 2214 | mddev->minor_version); | 2242 | rdev = md_import_device(dev, mddev->major_version, |
| 2243 | mddev->minor_version); | ||
| 2244 | else | ||
| 2245 | rdev = md_import_device(dev, -1, -1); | ||
| 2215 | if (IS_ERR(rdev)) { | 2246 | if (IS_ERR(rdev)) { |
| 2216 | printk(KERN_WARNING | 2247 | printk(KERN_WARNING |
| 2217 | "md: md_import_device returned %ld\n", | 2248 | "md: md_import_device returned %ld\n", |
| @@ -2231,6 +2262,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
| 2231 | rdev->saved_raid_disk = rdev->raid_disk; | 2262 | rdev->saved_raid_disk = rdev->raid_disk; |
| 2232 | 2263 | ||
| 2233 | rdev->in_sync = 0; /* just to be sure */ | 2264 | rdev->in_sync = 0; /* just to be sure */ |
| 2265 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
| 2266 | set_bit(WriteMostly, &rdev->flags); | ||
| 2267 | |||
| 2234 | rdev->raid_disk = -1; | 2268 | rdev->raid_disk = -1; |
| 2235 | err = bind_rdev_to_array(rdev, mddev); | 2269 | err = bind_rdev_to_array(rdev, mddev); |
| 2236 | if (err) | 2270 | if (err) |
| @@ -2271,6 +2305,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
| 2271 | else | 2305 | else |
| 2272 | rdev->in_sync = 0; | 2306 | rdev->in_sync = 0; |
| 2273 | 2307 | ||
| 2308 | if (info->state & (1<<MD_DISK_WRITEMOSTLY)) | ||
| 2309 | set_bit(WriteMostly, &rdev->flags); | ||
| 2310 | |||
| 2274 | err = bind_rdev_to_array(rdev, mddev); | 2311 | err = bind_rdev_to_array(rdev, mddev); |
| 2275 | if (err) { | 2312 | if (err) { |
| 2276 | export_rdev(rdev); | 2313 | export_rdev(rdev); |
| @@ -2430,25 +2467,51 @@ static int set_bitmap_file(mddev_t *mddev, int fd) | |||
| 2430 | { | 2467 | { |
| 2431 | int err; | 2468 | int err; |
| 2432 | 2469 | ||
| 2433 | if (mddev->pers) | 2470 | if (mddev->pers) { |
| 2434 | return -EBUSY; | 2471 | if (!mddev->pers->quiesce) |
| 2472 | return -EBUSY; | ||
| 2473 | if (mddev->recovery || mddev->sync_thread) | ||
| 2474 | return -EBUSY; | ||
| 2475 | /* we should be able to change the bitmap.. */ | ||
| 2476 | } | ||
| 2435 | 2477 | ||
| 2436 | mddev->bitmap_file = fget(fd); | ||
| 2437 | 2478 | ||
| 2438 | if (mddev->bitmap_file == NULL) { | 2479 | if (fd >= 0) { |
| 2439 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", | 2480 | if (mddev->bitmap) |
| 2440 | mdname(mddev)); | 2481 | return -EEXIST; /* cannot add when bitmap is present */ |
| 2441 | return -EBADF; | 2482 | mddev->bitmap_file = fget(fd); |
| 2442 | } | ||
| 2443 | 2483 | ||
| 2444 | err = deny_bitmap_write_access(mddev->bitmap_file); | 2484 | if (mddev->bitmap_file == NULL) { |
| 2445 | if (err) { | 2485 | printk(KERN_ERR "%s: error: failed to get bitmap file\n", |
| 2446 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | 2486 | mdname(mddev)); |
| 2447 | mdname(mddev)); | 2487 | return -EBADF; |
| 2448 | fput(mddev->bitmap_file); | 2488 | } |
| 2449 | mddev->bitmap_file = NULL; | 2489 | |
| 2450 | } else | 2490 | err = deny_bitmap_write_access(mddev->bitmap_file); |
| 2491 | if (err) { | ||
| 2492 | printk(KERN_ERR "%s: error: bitmap file is already in use\n", | ||
| 2493 | mdname(mddev)); | ||
| 2494 | fput(mddev->bitmap_file); | ||
| 2495 | mddev->bitmap_file = NULL; | ||
| 2496 | return err; | ||
| 2497 | } | ||
| 2451 | mddev->bitmap_offset = 0; /* file overrides offset */ | 2498 | mddev->bitmap_offset = 0; /* file overrides offset */ |
| 2499 | } else if (mddev->bitmap == NULL) | ||
| 2500 | return -ENOENT; /* cannot remove what isn't there */ | ||
| 2501 | err = 0; | ||
| 2502 | if (mddev->pers) { | ||
| 2503 | mddev->pers->quiesce(mddev, 1); | ||
| 2504 | if (fd >= 0) | ||
| 2505 | err = bitmap_create(mddev); | ||
| 2506 | if (fd < 0 || err) | ||
| 2507 | bitmap_destroy(mddev); | ||
| 2508 | mddev->pers->quiesce(mddev, 0); | ||
| 2509 | } else if (fd < 0) { | ||
| 2510 | if (mddev->bitmap_file) | ||
| 2511 | fput(mddev->bitmap_file); | ||
| 2512 | mddev->bitmap_file = NULL; | ||
| 2513 | } | ||
| 2514 | |||
| 2452 | return err; | 2515 | return err; |
| 2453 | } | 2516 | } |
| 2454 | 2517 | ||
| @@ -2528,6 +2591,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2528 | { | 2591 | { |
| 2529 | int rv = 0; | 2592 | int rv = 0; |
| 2530 | int cnt = 0; | 2593 | int cnt = 0; |
| 2594 | int state = 0; | ||
| 2595 | |||
| 2596 | /* calculate expected state,ignoring low bits */ | ||
| 2597 | if (mddev->bitmap && mddev->bitmap_offset) | ||
| 2598 | state |= (1 << MD_SB_BITMAP_PRESENT); | ||
| 2531 | 2599 | ||
| 2532 | if (mddev->major_version != info->major_version || | 2600 | if (mddev->major_version != info->major_version || |
| 2533 | mddev->minor_version != info->minor_version || | 2601 | mddev->minor_version != info->minor_version || |
| @@ -2536,12 +2604,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2536 | mddev->level != info->level || | 2604 | mddev->level != info->level || |
| 2537 | /* mddev->layout != info->layout || */ | 2605 | /* mddev->layout != info->layout || */ |
| 2538 | !mddev->persistent != info->not_persistent|| | 2606 | !mddev->persistent != info->not_persistent|| |
| 2539 | mddev->chunk_size != info->chunk_size ) | 2607 | mddev->chunk_size != info->chunk_size || |
| 2608 | /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ | ||
| 2609 | ((state^info->state) & 0xfffffe00) | ||
| 2610 | ) | ||
| 2540 | return -EINVAL; | 2611 | return -EINVAL; |
| 2541 | /* Check there is only one change */ | 2612 | /* Check there is only one change */ |
| 2542 | if (mddev->size != info->size) cnt++; | 2613 | if (mddev->size != info->size) cnt++; |
| 2543 | if (mddev->raid_disks != info->raid_disks) cnt++; | 2614 | if (mddev->raid_disks != info->raid_disks) cnt++; |
| 2544 | if (mddev->layout != info->layout) cnt++; | 2615 | if (mddev->layout != info->layout) cnt++; |
| 2616 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++; | ||
| 2545 | if (cnt == 0) return 0; | 2617 | if (cnt == 0) return 0; |
| 2546 | if (cnt > 1) return -EINVAL; | 2618 | if (cnt > 1) return -EINVAL; |
| 2547 | 2619 | ||
| @@ -2620,6 +2692,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
| 2620 | } | 2692 | } |
| 2621 | } | 2693 | } |
| 2622 | } | 2694 | } |
| 2695 | if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { | ||
| 2696 | if (mddev->pers->quiesce == NULL) | ||
| 2697 | return -EINVAL; | ||
| 2698 | if (mddev->recovery || mddev->sync_thread) | ||
| 2699 | return -EBUSY; | ||
| 2700 | if (info->state & (1<<MD_SB_BITMAP_PRESENT)) { | ||
| 2701 | /* add the bitmap */ | ||
| 2702 | if (mddev->bitmap) | ||
| 2703 | return -EEXIST; | ||
| 2704 | if (mddev->default_bitmap_offset == 0) | ||
| 2705 | return -EINVAL; | ||
| 2706 | mddev->bitmap_offset = mddev->default_bitmap_offset; | ||
| 2707 | mddev->pers->quiesce(mddev, 1); | ||
| 2708 | rv = bitmap_create(mddev); | ||
| 2709 | if (rv) | ||
| 2710 | bitmap_destroy(mddev); | ||
| 2711 | mddev->pers->quiesce(mddev, 0); | ||
| 2712 | } else { | ||
| 2713 | /* remove the bitmap */ | ||
| 2714 | if (!mddev->bitmap) | ||
| 2715 | return -ENOENT; | ||
| 2716 | if (mddev->bitmap->file) | ||
| 2717 | return -EINVAL; | ||
| 2718 | mddev->pers->quiesce(mddev, 1); | ||
| 2719 | bitmap_destroy(mddev); | ||
| 2720 | mddev->pers->quiesce(mddev, 0); | ||
| 2721 | mddev->bitmap_offset = 0; | ||
| 2722 | } | ||
| 2723 | } | ||
| 2623 | md_update_sb(mddev); | 2724 | md_update_sb(mddev); |
| 2624 | return rv; | 2725 | return rv; |
| 2625 | } | 2726 | } |
| @@ -2781,7 +2882,7 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
| 2781 | goto done_unlock; | 2882 | goto done_unlock; |
| 2782 | 2883 | ||
| 2783 | case GET_BITMAP_FILE: | 2884 | case GET_BITMAP_FILE: |
| 2784 | err = get_bitmap_file(mddev, (void *)arg); | 2885 | err = get_bitmap_file(mddev, argp); |
| 2785 | goto done_unlock; | 2886 | goto done_unlock; |
| 2786 | 2887 | ||
| 2787 | case GET_DISK_INFO: | 2888 | case GET_DISK_INFO: |
| @@ -2950,18 +3051,6 @@ static int md_thread(void * arg) | |||
| 2950 | { | 3051 | { |
| 2951 | mdk_thread_t *thread = arg; | 3052 | mdk_thread_t *thread = arg; |
| 2952 | 3053 | ||
| 2953 | lock_kernel(); | ||
| 2954 | |||
| 2955 | /* | ||
| 2956 | * Detach thread | ||
| 2957 | */ | ||
| 2958 | |||
| 2959 | daemonize(thread->name, mdname(thread->mddev)); | ||
| 2960 | |||
| 2961 | current->exit_signal = SIGCHLD; | ||
| 2962 | allow_signal(SIGKILL); | ||
| 2963 | thread->tsk = current; | ||
| 2964 | |||
| 2965 | /* | 3054 | /* |
| 2966 | * md_thread is a 'system-thread', it's priority should be very | 3055 | * md_thread is a 'system-thread', it's priority should be very |
| 2967 | * high. We avoid resource deadlocks individually in each | 3056 | * high. We avoid resource deadlocks individually in each |
| @@ -2973,14 +3062,14 @@ static int md_thread(void * arg) | |||
| 2973 | * bdflush, otherwise bdflush will deadlock if there are too | 3062 | * bdflush, otherwise bdflush will deadlock if there are too |
| 2974 | * many dirty RAID5 blocks. | 3063 | * many dirty RAID5 blocks. |
| 2975 | */ | 3064 | */ |
| 2976 | unlock_kernel(); | ||
| 2977 | 3065 | ||
| 2978 | complete(thread->event); | 3066 | complete(thread->event); |
| 2979 | while (thread->run) { | 3067 | while (!kthread_should_stop()) { |
| 2980 | void (*run)(mddev_t *); | 3068 | void (*run)(mddev_t *); |
| 2981 | 3069 | ||
| 2982 | wait_event_interruptible_timeout(thread->wqueue, | 3070 | wait_event_interruptible_timeout(thread->wqueue, |
| 2983 | test_bit(THREAD_WAKEUP, &thread->flags), | 3071 | test_bit(THREAD_WAKEUP, &thread->flags) |
| 3072 | || kthread_should_stop(), | ||
| 2984 | thread->timeout); | 3073 | thread->timeout); |
| 2985 | try_to_freeze(); | 3074 | try_to_freeze(); |
| 2986 | 3075 | ||
| @@ -2989,11 +3078,8 @@ static int md_thread(void * arg) | |||
| 2989 | run = thread->run; | 3078 | run = thread->run; |
| 2990 | if (run) | 3079 | if (run) |
| 2991 | run(thread->mddev); | 3080 | run(thread->mddev); |
| 2992 | |||
| 2993 | if (signal_pending(current)) | ||
| 2994 | flush_signals(current); | ||
| 2995 | } | 3081 | } |
| 2996 | complete(thread->event); | 3082 | |
| 2997 | return 0; | 3083 | return 0; |
| 2998 | } | 3084 | } |
| 2999 | 3085 | ||
| @@ -3010,11 +3096,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
| 3010 | const char *name) | 3096 | const char *name) |
| 3011 | { | 3097 | { |
| 3012 | mdk_thread_t *thread; | 3098 | mdk_thread_t *thread; |
| 3013 | int ret; | ||
| 3014 | struct completion event; | 3099 | struct completion event; |
| 3015 | 3100 | ||
| 3016 | thread = (mdk_thread_t *) kmalloc | 3101 | thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); |
| 3017 | (sizeof(mdk_thread_t), GFP_KERNEL); | ||
| 3018 | if (!thread) | 3102 | if (!thread) |
| 3019 | return NULL; | 3103 | return NULL; |
| 3020 | 3104 | ||
| @@ -3027,8 +3111,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
| 3027 | thread->mddev = mddev; | 3111 | thread->mddev = mddev; |
| 3028 | thread->name = name; | 3112 | thread->name = name; |
| 3029 | thread->timeout = MAX_SCHEDULE_TIMEOUT; | 3113 | thread->timeout = MAX_SCHEDULE_TIMEOUT; |
| 3030 | ret = kernel_thread(md_thread, thread, 0); | 3114 | thread->tsk = kthread_run(md_thread, thread, mdname(thread->mddev)); |
| 3031 | if (ret < 0) { | 3115 | if (IS_ERR(thread->tsk)) { |
| 3032 | kfree(thread); | 3116 | kfree(thread); |
| 3033 | return NULL; | 3117 | return NULL; |
| 3034 | } | 3118 | } |
| @@ -3038,21 +3122,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, | |||
| 3038 | 3122 | ||
| 3039 | void md_unregister_thread(mdk_thread_t *thread) | 3123 | void md_unregister_thread(mdk_thread_t *thread) |
| 3040 | { | 3124 | { |
| 3041 | struct completion event; | ||
| 3042 | |||
| 3043 | init_completion(&event); | ||
| 3044 | |||
| 3045 | thread->event = &event; | ||
| 3046 | |||
| 3047 | /* As soon as ->run is set to NULL, the task could disappear, | ||
| 3048 | * so we need to hold tasklist_lock until we have sent the signal | ||
| 3049 | */ | ||
| 3050 | dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); | 3125 | dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); |
| 3051 | read_lock(&tasklist_lock); | 3126 | |
| 3052 | thread->run = NULL; | 3127 | kthread_stop(thread->tsk); |
| 3053 | send_sig(SIGKILL, thread->tsk, 1); | ||
| 3054 | read_unlock(&tasklist_lock); | ||
| 3055 | wait_for_completion(&event); | ||
| 3056 | kfree(thread); | 3128 | kfree(thread); |
| 3057 | } | 3129 | } |
| 3058 | 3130 | ||
| @@ -3259,10 +3331,13 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
| 3259 | char b[BDEVNAME_SIZE]; | 3331 | char b[BDEVNAME_SIZE]; |
| 3260 | seq_printf(seq, " %s[%d]", | 3332 | seq_printf(seq, " %s[%d]", |
| 3261 | bdevname(rdev->bdev,b), rdev->desc_nr); | 3333 | bdevname(rdev->bdev,b), rdev->desc_nr); |
| 3334 | if (test_bit(WriteMostly, &rdev->flags)) | ||
| 3335 | seq_printf(seq, "(W)"); | ||
| 3262 | if (rdev->faulty) { | 3336 | if (rdev->faulty) { |
| 3263 | seq_printf(seq, "(F)"); | 3337 | seq_printf(seq, "(F)"); |
| 3264 | continue; | 3338 | continue; |
| 3265 | } | 3339 | } else if (rdev->raid_disk < 0) |
| 3340 | seq_printf(seq, "(S)"); /* spare */ | ||
| 3266 | size += rdev->size; | 3341 | size += rdev->size; |
| 3267 | } | 3342 | } |
| 3268 | 3343 | ||
| @@ -3274,6 +3349,15 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
| 3274 | seq_printf(seq, "\n %llu blocks", | 3349 | seq_printf(seq, "\n %llu blocks", |
| 3275 | (unsigned long long)size); | 3350 | (unsigned long long)size); |
| 3276 | } | 3351 | } |
| 3352 | if (mddev->persistent) { | ||
| 3353 | if (mddev->major_version != 0 || | ||
| 3354 | mddev->minor_version != 90) { | ||
| 3355 | seq_printf(seq," super %d.%d", | ||
| 3356 | mddev->major_version, | ||
| 3357 | mddev->minor_version); | ||
| 3358 | } | ||
| 3359 | } else | ||
| 3360 | seq_printf(seq, " super non-persistent"); | ||
| 3277 | 3361 | ||
| 3278 | if (mddev->pers) { | 3362 | if (mddev->pers) { |
| 3279 | mddev->pers->status (seq, mddev); | 3363 | mddev->pers->status (seq, mddev); |
| @@ -3416,7 +3500,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
| 3416 | */ | 3500 | */ |
| 3417 | void md_write_start(mddev_t *mddev, struct bio *bi) | 3501 | void md_write_start(mddev_t *mddev, struct bio *bi) |
| 3418 | { | 3502 | { |
| 3419 | DEFINE_WAIT(w); | ||
| 3420 | if (bio_data_dir(bi) != WRITE) | 3503 | if (bio_data_dir(bi) != WRITE) |
| 3421 | return; | 3504 | return; |
| 3422 | 3505 | ||
| @@ -3533,7 +3616,7 @@ static void md_do_sync(mddev_t *mddev) | |||
| 3533 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); | 3616 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); |
| 3534 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" | 3617 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" |
| 3535 | " %d KB/sec/disc.\n", sysctl_speed_limit_min); | 3618 | " %d KB/sec/disc.\n", sysctl_speed_limit_min); |
| 3536 | printk(KERN_INFO "md: using maximum available idle IO bandwith " | 3619 | printk(KERN_INFO "md: using maximum available idle IO bandwidth " |
| 3537 | "(but not more than %d KB/sec) for reconstruction.\n", | 3620 | "(but not more than %d KB/sec) for reconstruction.\n", |
| 3538 | sysctl_speed_limit_max); | 3621 | sysctl_speed_limit_max); |
| 3539 | 3622 | ||
