aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c227
1 files changed, 155 insertions, 72 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 20ca80b7dc20..2897df90df44 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/module.h> 35#include <linux/module.h>
36#include <linux/config.h> 36#include <linux/config.h>
37#include <linux/kthread.h>
37#include <linux/linkage.h> 38#include <linux/linkage.h>
38#include <linux/raid/md.h> 39#include <linux/raid/md.h>
39#include <linux/raid/bitmap.h> 40#include <linux/raid/bitmap.h>
@@ -73,7 +74,7 @@ static DEFINE_SPINLOCK(pers_lock);
73 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' 74 * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
74 * is 1000 KB/sec, so the extra system load does not show up that much. 75 * is 1000 KB/sec, so the extra system load does not show up that much.
75 * Increase it if you want to have more _guaranteed_ speed. Note that 76 * Increase it if you want to have more _guaranteed_ speed. Note that
76 * the RAID driver will use the maximum available bandwith if the IO 77 * the RAID driver will use the maximum available bandwidth if the IO
77 * subsystem is idle. There is also an 'absolute maximum' reconstruction 78 * subsystem is idle. There is also an 'absolute maximum' reconstruction
78 * speed limit - in case reconstruction slows down your system despite 79 * speed limit - in case reconstruction slows down your system despite
79 * idle IO detection. 80 * idle IO detection.
@@ -393,7 +394,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
393 return ret; 394 return ret;
394} 395}
395 396
396static int read_disk_sb(mdk_rdev_t * rdev) 397static int read_disk_sb(mdk_rdev_t * rdev, int size)
397{ 398{
398 char b[BDEVNAME_SIZE]; 399 char b[BDEVNAME_SIZE];
399 if (!rdev->sb_page) { 400 if (!rdev->sb_page) {
@@ -404,7 +405,7 @@ static int read_disk_sb(mdk_rdev_t * rdev)
404 return 0; 405 return 0;
405 406
406 407
407 if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) 408 if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ))
408 goto fail; 409 goto fail;
409 rdev->sb_loaded = 1; 410 rdev->sb_loaded = 1;
410 return 0; 411 return 0;
@@ -531,7 +532,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
531 sb_offset = calc_dev_sboffset(rdev->bdev); 532 sb_offset = calc_dev_sboffset(rdev->bdev);
532 rdev->sb_offset = sb_offset; 533 rdev->sb_offset = sb_offset;
533 534
534 ret = read_disk_sb(rdev); 535 ret = read_disk_sb(rdev, MD_SB_BYTES);
535 if (ret) return ret; 536 if (ret) return ret;
536 537
537 ret = -EINVAL; 538 ret = -EINVAL;
@@ -564,6 +565,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
564 565
565 rdev->preferred_minor = sb->md_minor; 566 rdev->preferred_minor = sb->md_minor;
566 rdev->data_offset = 0; 567 rdev->data_offset = 0;
568 rdev->sb_size = MD_SB_BYTES;
567 569
568 if (sb->level == LEVEL_MULTIPATH) 570 if (sb->level == LEVEL_MULTIPATH)
569 rdev->desc_nr = -1; 571 rdev->desc_nr = -1;
@@ -623,6 +625,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
623 mddev->size = sb->size; 625 mddev->size = sb->size;
624 mddev->events = md_event(sb); 626 mddev->events = md_event(sb);
625 mddev->bitmap_offset = 0; 627 mddev->bitmap_offset = 0;
628 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
626 629
627 if (sb->state & (1<<MD_SB_CLEAN)) 630 if (sb->state & (1<<MD_SB_CLEAN))
628 mddev->recovery_cp = MaxSector; 631 mddev->recovery_cp = MaxSector;
@@ -643,12 +646,12 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
643 646
644 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) && 647 if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
645 mddev->bitmap_file == NULL) { 648 mddev->bitmap_file == NULL) {
646 if (mddev->level != 1) { 649 if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) {
647 /* FIXME use a better test */ 650 /* FIXME use a better test */
648 printk(KERN_WARNING "md: bitmaps only support for raid1\n"); 651 printk(KERN_WARNING "md: bitmaps only support for raid1\n");
649 return -EINVAL; 652 return -EINVAL;
650 } 653 }
651 mddev->bitmap_offset = (MD_SB_BYTES >> 9); 654 mddev->bitmap_offset = mddev->default_bitmap_offset;
652 } 655 }
653 656
654 } else if (mddev->pers == NULL) { 657 } else if (mddev->pers == NULL) {
@@ -669,6 +672,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
669 672
670 if (mddev->level != LEVEL_MULTIPATH) { 673 if (mddev->level != LEVEL_MULTIPATH) {
671 rdev->faulty = 0; 674 rdev->faulty = 0;
675 rdev->flags = 0;
672 desc = sb->disks + rdev->desc_nr; 676 desc = sb->disks + rdev->desc_nr;
673 677
674 if (desc->state & (1<<MD_DISK_FAULTY)) 678 if (desc->state & (1<<MD_DISK_FAULTY))
@@ -678,6 +682,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
678 rdev->in_sync = 1; 682 rdev->in_sync = 1;
679 rdev->raid_disk = desc->raid_disk; 683 rdev->raid_disk = desc->raid_disk;
680 } 684 }
685 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
686 set_bit(WriteMostly, &rdev->flags);
681 } else /* MULTIPATH are always insync */ 687 } else /* MULTIPATH are always insync */
682 rdev->in_sync = 1; 688 rdev->in_sync = 1;
683 return 0; 689 return 0;
@@ -706,6 +712,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
706 int i; 712 int i;
707 int active=0, working=0,failed=0,spare=0,nr_disks=0; 713 int active=0, working=0,failed=0,spare=0,nr_disks=0;
708 714
715 rdev->sb_size = MD_SB_BYTES;
716
709 sb = (mdp_super_t*)page_address(rdev->sb_page); 717 sb = (mdp_super_t*)page_address(rdev->sb_page);
710 718
711 memset(sb, 0, sizeof(*sb)); 719 memset(sb, 0, sizeof(*sb));
@@ -776,6 +784,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
776 spare++; 784 spare++;
777 working++; 785 working++;
778 } 786 }
787 if (test_bit(WriteMostly, &rdev2->flags))
788 d->state |= (1<<MD_DISK_WRITEMOSTLY);
779 } 789 }
780 790
781 /* now set the "removed" and "faulty" bits on any missing devices */ 791 /* now set the "removed" and "faulty" bits on any missing devices */
@@ -831,6 +841,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
831 int ret; 841 int ret;
832 sector_t sb_offset; 842 sector_t sb_offset;
833 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; 843 char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
844 int bmask;
834 845
835 /* 846 /*
836 * Calculate the position of the superblock. 847 * Calculate the position of the superblock.
@@ -859,7 +870,10 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
859 } 870 }
860 rdev->sb_offset = sb_offset; 871 rdev->sb_offset = sb_offset;
861 872
862 ret = read_disk_sb(rdev); 873 /* superblock is rarely larger than 1K, but it can be larger,
874 * and it is safe to read 4k, so we do that
875 */
876 ret = read_disk_sb(rdev, 4096);
863 if (ret) return ret; 877 if (ret) return ret;
864 878
865 879
@@ -869,7 +883,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
869 sb->major_version != cpu_to_le32(1) || 883 sb->major_version != cpu_to_le32(1) ||
870 le32_to_cpu(sb->max_dev) > (4096-256)/2 || 884 le32_to_cpu(sb->max_dev) > (4096-256)/2 ||
871 le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || 885 le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) ||
872 sb->feature_map != 0) 886 (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0)
873 return -EINVAL; 887 return -EINVAL;
874 888
875 if (calc_sb_1_csum(sb) != sb->sb_csum) { 889 if (calc_sb_1_csum(sb) != sb->sb_csum) {
@@ -885,6 +899,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
885 rdev->preferred_minor = 0xffff; 899 rdev->preferred_minor = 0xffff;
886 rdev->data_offset = le64_to_cpu(sb->data_offset); 900 rdev->data_offset = le64_to_cpu(sb->data_offset);
887 901
902 rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
903 bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
904 if (rdev->sb_size & bmask)
905 rdev-> sb_size = (rdev->sb_size | bmask)+1;
906
888 if (refdev == 0) 907 if (refdev == 0)
889 return 1; 908 return 1;
890 else { 909 else {
@@ -939,13 +958,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
939 mddev->size = le64_to_cpu(sb->size)/2; 958 mddev->size = le64_to_cpu(sb->size)/2;
940 mddev->events = le64_to_cpu(sb->events); 959 mddev->events = le64_to_cpu(sb->events);
941 mddev->bitmap_offset = 0; 960 mddev->bitmap_offset = 0;
961 mddev->default_bitmap_offset = 0;
962 mddev->default_bitmap_offset = 1024;
942 963
943 mddev->recovery_cp = le64_to_cpu(sb->resync_offset); 964 mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
944 memcpy(mddev->uuid, sb->set_uuid, 16); 965 memcpy(mddev->uuid, sb->set_uuid, 16);
945 966
946 mddev->max_disks = (4096-256)/2; 967 mddev->max_disks = (4096-256)/2;
947 968
948 if ((le32_to_cpu(sb->feature_map) & 1) && 969 if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
949 mddev->bitmap_file == NULL ) { 970 mddev->bitmap_file == NULL ) {
950 if (mddev->level != 1) { 971 if (mddev->level != 1) {
951 printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); 972 printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
@@ -986,6 +1007,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
986 rdev->raid_disk = role; 1007 rdev->raid_disk = role;
987 break; 1008 break;
988 } 1009 }
1010 rdev->flags = 0;
1011 if (sb->devflags & WriteMostly1)
1012 set_bit(WriteMostly, &rdev->flags);
989 } else /* MULTIPATH are always insync */ 1013 } else /* MULTIPATH are always insync */
990 rdev->in_sync = 1; 1014 rdev->in_sync = 1;
991 1015
@@ -1017,7 +1041,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1017 1041
1018 if (mddev->bitmap && mddev->bitmap_file == NULL) { 1042 if (mddev->bitmap && mddev->bitmap_file == NULL) {
1019 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1043 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1020 sb->feature_map = cpu_to_le32(1); 1044 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1021 } 1045 }
1022 1046
1023 max_dev = 0; 1047 max_dev = 0;
@@ -1363,7 +1387,7 @@ repeat:
1363 dprintk("%s ", bdevname(rdev->bdev,b)); 1387 dprintk("%s ", bdevname(rdev->bdev,b));
1364 if (!rdev->faulty) { 1388 if (!rdev->faulty) {
1365 md_super_write(mddev,rdev, 1389 md_super_write(mddev,rdev,
1366 rdev->sb_offset<<1, MD_SB_BYTES, 1390 rdev->sb_offset<<1, rdev->sb_size,
1367 rdev->sb_page); 1391 rdev->sb_page);
1368 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", 1392 dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
1369 bdevname(rdev->bdev,b), 1393 bdevname(rdev->bdev,b),
@@ -2073,6 +2097,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
2073 info.state = 0; 2097 info.state = 0;
2074 if (mddev->in_sync) 2098 if (mddev->in_sync)
2075 info.state = (1<<MD_SB_CLEAN); 2099 info.state = (1<<MD_SB_CLEAN);
2100 if (mddev->bitmap && mddev->bitmap_offset)
2101 info.state = (1<<MD_SB_BITMAP_PRESENT);
2076 info.active_disks = active; 2102 info.active_disks = active;
2077 info.working_disks = working; 2103 info.working_disks = working;
2078 info.failed_disks = failed; 2104 info.failed_disks = failed;
@@ -2087,7 +2113,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
2087 return 0; 2113 return 0;
2088} 2114}
2089 2115
2090static int get_bitmap_file(mddev_t * mddev, void * arg) 2116static int get_bitmap_file(mddev_t * mddev, void __user * arg)
2091{ 2117{
2092 mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ 2118 mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
2093 char *ptr, *buf = NULL; 2119 char *ptr, *buf = NULL;
@@ -2146,6 +2172,8 @@ static int get_disk_info(mddev_t * mddev, void __user * arg)
2146 info.state |= (1<<MD_DISK_ACTIVE); 2172 info.state |= (1<<MD_DISK_ACTIVE);
2147 info.state |= (1<<MD_DISK_SYNC); 2173 info.state |= (1<<MD_DISK_SYNC);
2148 } 2174 }
2175 if (test_bit(WriteMostly, &rdev->flags))
2176 info.state |= (1<<MD_DISK_WRITEMOSTLY);
2149 } else { 2177 } else {
2150 info.major = info.minor = 0; 2178 info.major = info.minor = 0;
2151 info.raid_disk = -1; 2179 info.raid_disk = -1;
@@ -2210,8 +2238,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
2210 mdname(mddev)); 2238 mdname(mddev));
2211 return -EINVAL; 2239 return -EINVAL;
2212 } 2240 }
2213 rdev = md_import_device(dev, mddev->major_version, 2241 if (mddev->persistent)
2214 mddev->minor_version); 2242 rdev = md_import_device(dev, mddev->major_version,
2243 mddev->minor_version);
2244 else
2245 rdev = md_import_device(dev, -1, -1);
2215 if (IS_ERR(rdev)) { 2246 if (IS_ERR(rdev)) {
2216 printk(KERN_WARNING 2247 printk(KERN_WARNING
2217 "md: md_import_device returned %ld\n", 2248 "md: md_import_device returned %ld\n",
@@ -2231,6 +2262,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
2231 rdev->saved_raid_disk = rdev->raid_disk; 2262 rdev->saved_raid_disk = rdev->raid_disk;
2232 2263
2233 rdev->in_sync = 0; /* just to be sure */ 2264 rdev->in_sync = 0; /* just to be sure */
2265 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
2266 set_bit(WriteMostly, &rdev->flags);
2267
2234 rdev->raid_disk = -1; 2268 rdev->raid_disk = -1;
2235 err = bind_rdev_to_array(rdev, mddev); 2269 err = bind_rdev_to_array(rdev, mddev);
2236 if (err) 2270 if (err)
@@ -2271,6 +2305,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
2271 else 2305 else
2272 rdev->in_sync = 0; 2306 rdev->in_sync = 0;
2273 2307
2308 if (info->state & (1<<MD_DISK_WRITEMOSTLY))
2309 set_bit(WriteMostly, &rdev->flags);
2310
2274 err = bind_rdev_to_array(rdev, mddev); 2311 err = bind_rdev_to_array(rdev, mddev);
2275 if (err) { 2312 if (err) {
2276 export_rdev(rdev); 2313 export_rdev(rdev);
@@ -2430,25 +2467,51 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
2430{ 2467{
2431 int err; 2468 int err;
2432 2469
2433 if (mddev->pers) 2470 if (mddev->pers) {
2434 return -EBUSY; 2471 if (!mddev->pers->quiesce)
2472 return -EBUSY;
2473 if (mddev->recovery || mddev->sync_thread)
2474 return -EBUSY;
2475 /* we should be able to change the bitmap.. */
2476 }
2435 2477
2436 mddev->bitmap_file = fget(fd);
2437 2478
2438 if (mddev->bitmap_file == NULL) { 2479 if (fd >= 0) {
2439 printk(KERN_ERR "%s: error: failed to get bitmap file\n", 2480 if (mddev->bitmap)
2440 mdname(mddev)); 2481 return -EEXIST; /* cannot add when bitmap is present */
2441 return -EBADF; 2482 mddev->bitmap_file = fget(fd);
2442 }
2443 2483
2444 err = deny_bitmap_write_access(mddev->bitmap_file); 2484 if (mddev->bitmap_file == NULL) {
2445 if (err) { 2485 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
2446 printk(KERN_ERR "%s: error: bitmap file is already in use\n", 2486 mdname(mddev));
2447 mdname(mddev)); 2487 return -EBADF;
2448 fput(mddev->bitmap_file); 2488 }
2449 mddev->bitmap_file = NULL; 2489
2450 } else 2490 err = deny_bitmap_write_access(mddev->bitmap_file);
2491 if (err) {
2492 printk(KERN_ERR "%s: error: bitmap file is already in use\n",
2493 mdname(mddev));
2494 fput(mddev->bitmap_file);
2495 mddev->bitmap_file = NULL;
2496 return err;
2497 }
2451 mddev->bitmap_offset = 0; /* file overrides offset */ 2498 mddev->bitmap_offset = 0; /* file overrides offset */
2499 } else if (mddev->bitmap == NULL)
2500 return -ENOENT; /* cannot remove what isn't there */
2501 err = 0;
2502 if (mddev->pers) {
2503 mddev->pers->quiesce(mddev, 1);
2504 if (fd >= 0)
2505 err = bitmap_create(mddev);
2506 if (fd < 0 || err)
2507 bitmap_destroy(mddev);
2508 mddev->pers->quiesce(mddev, 0);
2509 } else if (fd < 0) {
2510 if (mddev->bitmap_file)
2511 fput(mddev->bitmap_file);
2512 mddev->bitmap_file = NULL;
2513 }
2514
2452 return err; 2515 return err;
2453} 2516}
2454 2517
@@ -2528,6 +2591,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
2528{ 2591{
2529 int rv = 0; 2592 int rv = 0;
2530 int cnt = 0; 2593 int cnt = 0;
2594 int state = 0;
2595
2596 /* calculate expected state,ignoring low bits */
2597 if (mddev->bitmap && mddev->bitmap_offset)
2598 state |= (1 << MD_SB_BITMAP_PRESENT);
2531 2599
2532 if (mddev->major_version != info->major_version || 2600 if (mddev->major_version != info->major_version ||
2533 mddev->minor_version != info->minor_version || 2601 mddev->minor_version != info->minor_version ||
@@ -2536,12 +2604,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
2536 mddev->level != info->level || 2604 mddev->level != info->level ||
2537/* mddev->layout != info->layout || */ 2605/* mddev->layout != info->layout || */
2538 !mddev->persistent != info->not_persistent|| 2606 !mddev->persistent != info->not_persistent||
2539 mddev->chunk_size != info->chunk_size ) 2607 mddev->chunk_size != info->chunk_size ||
2608 /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
2609 ((state^info->state) & 0xfffffe00)
2610 )
2540 return -EINVAL; 2611 return -EINVAL;
2541 /* Check there is only one change */ 2612 /* Check there is only one change */
2542 if (mddev->size != info->size) cnt++; 2613 if (mddev->size != info->size) cnt++;
2543 if (mddev->raid_disks != info->raid_disks) cnt++; 2614 if (mddev->raid_disks != info->raid_disks) cnt++;
2544 if (mddev->layout != info->layout) cnt++; 2615 if (mddev->layout != info->layout) cnt++;
2616 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
2545 if (cnt == 0) return 0; 2617 if (cnt == 0) return 0;
2546 if (cnt > 1) return -EINVAL; 2618 if (cnt > 1) return -EINVAL;
2547 2619
@@ -2620,6 +2692,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
2620 } 2692 }
2621 } 2693 }
2622 } 2694 }
2695 if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
2696 if (mddev->pers->quiesce == NULL)
2697 return -EINVAL;
2698 if (mddev->recovery || mddev->sync_thread)
2699 return -EBUSY;
2700 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
2701 /* add the bitmap */
2702 if (mddev->bitmap)
2703 return -EEXIST;
2704 if (mddev->default_bitmap_offset == 0)
2705 return -EINVAL;
2706 mddev->bitmap_offset = mddev->default_bitmap_offset;
2707 mddev->pers->quiesce(mddev, 1);
2708 rv = bitmap_create(mddev);
2709 if (rv)
2710 bitmap_destroy(mddev);
2711 mddev->pers->quiesce(mddev, 0);
2712 } else {
2713 /* remove the bitmap */
2714 if (!mddev->bitmap)
2715 return -ENOENT;
2716 if (mddev->bitmap->file)
2717 return -EINVAL;
2718 mddev->pers->quiesce(mddev, 1);
2719 bitmap_destroy(mddev);
2720 mddev->pers->quiesce(mddev, 0);
2721 mddev->bitmap_offset = 0;
2722 }
2723 }
2623 md_update_sb(mddev); 2724 md_update_sb(mddev);
2624 return rv; 2725 return rv;
2625} 2726}
@@ -2781,7 +2882,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
2781 goto done_unlock; 2882 goto done_unlock;
2782 2883
2783 case GET_BITMAP_FILE: 2884 case GET_BITMAP_FILE:
2784 err = get_bitmap_file(mddev, (void *)arg); 2885 err = get_bitmap_file(mddev, argp);
2785 goto done_unlock; 2886 goto done_unlock;
2786 2887
2787 case GET_DISK_INFO: 2888 case GET_DISK_INFO:
@@ -2950,18 +3051,6 @@ static int md_thread(void * arg)
2950{ 3051{
2951 mdk_thread_t *thread = arg; 3052 mdk_thread_t *thread = arg;
2952 3053
2953 lock_kernel();
2954
2955 /*
2956 * Detach thread
2957 */
2958
2959 daemonize(thread->name, mdname(thread->mddev));
2960
2961 current->exit_signal = SIGCHLD;
2962 allow_signal(SIGKILL);
2963 thread->tsk = current;
2964
2965 /* 3054 /*
2966 * md_thread is a 'system-thread', it's priority should be very 3055 * md_thread is a 'system-thread', it's priority should be very
2967 * high. We avoid resource deadlocks individually in each 3056 * high. We avoid resource deadlocks individually in each
@@ -2973,14 +3062,14 @@ static int md_thread(void * arg)
2973 * bdflush, otherwise bdflush will deadlock if there are too 3062 * bdflush, otherwise bdflush will deadlock if there are too
2974 * many dirty RAID5 blocks. 3063 * many dirty RAID5 blocks.
2975 */ 3064 */
2976 unlock_kernel();
2977 3065
2978 complete(thread->event); 3066 complete(thread->event);
2979 while (thread->run) { 3067 while (!kthread_should_stop()) {
2980 void (*run)(mddev_t *); 3068 void (*run)(mddev_t *);
2981 3069
2982 wait_event_interruptible_timeout(thread->wqueue, 3070 wait_event_interruptible_timeout(thread->wqueue,
2983 test_bit(THREAD_WAKEUP, &thread->flags), 3071 test_bit(THREAD_WAKEUP, &thread->flags)
3072 || kthread_should_stop(),
2984 thread->timeout); 3073 thread->timeout);
2985 try_to_freeze(); 3074 try_to_freeze();
2986 3075
@@ -2989,11 +3078,8 @@ static int md_thread(void * arg)
2989 run = thread->run; 3078 run = thread->run;
2990 if (run) 3079 if (run)
2991 run(thread->mddev); 3080 run(thread->mddev);
2992
2993 if (signal_pending(current))
2994 flush_signals(current);
2995 } 3081 }
2996 complete(thread->event); 3082
2997 return 0; 3083 return 0;
2998} 3084}
2999 3085
@@ -3010,11 +3096,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
3010 const char *name) 3096 const char *name)
3011{ 3097{
3012 mdk_thread_t *thread; 3098 mdk_thread_t *thread;
3013 int ret;
3014 struct completion event; 3099 struct completion event;
3015 3100
3016 thread = (mdk_thread_t *) kmalloc 3101 thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL);
3017 (sizeof(mdk_thread_t), GFP_KERNEL);
3018 if (!thread) 3102 if (!thread)
3019 return NULL; 3103 return NULL;
3020 3104
@@ -3027,8 +3111,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
3027 thread->mddev = mddev; 3111 thread->mddev = mddev;
3028 thread->name = name; 3112 thread->name = name;
3029 thread->timeout = MAX_SCHEDULE_TIMEOUT; 3113 thread->timeout = MAX_SCHEDULE_TIMEOUT;
3030 ret = kernel_thread(md_thread, thread, 0); 3114 thread->tsk = kthread_run(md_thread, thread, mdname(thread->mddev));
3031 if (ret < 0) { 3115 if (IS_ERR(thread->tsk)) {
3032 kfree(thread); 3116 kfree(thread);
3033 return NULL; 3117 return NULL;
3034 } 3118 }
@@ -3038,21 +3122,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
3038 3122
3039void md_unregister_thread(mdk_thread_t *thread) 3123void md_unregister_thread(mdk_thread_t *thread)
3040{ 3124{
3041 struct completion event;
3042
3043 init_completion(&event);
3044
3045 thread->event = &event;
3046
3047 /* As soon as ->run is set to NULL, the task could disappear,
3048 * so we need to hold tasklist_lock until we have sent the signal
3049 */
3050 dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); 3125 dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
3051 read_lock(&tasklist_lock); 3126
3052 thread->run = NULL; 3127 kthread_stop(thread->tsk);
3053 send_sig(SIGKILL, thread->tsk, 1);
3054 read_unlock(&tasklist_lock);
3055 wait_for_completion(&event);
3056 kfree(thread); 3128 kfree(thread);
3057} 3129}
3058 3130
@@ -3259,10 +3331,13 @@ static int md_seq_show(struct seq_file *seq, void *v)
3259 char b[BDEVNAME_SIZE]; 3331 char b[BDEVNAME_SIZE];
3260 seq_printf(seq, " %s[%d]", 3332 seq_printf(seq, " %s[%d]",
3261 bdevname(rdev->bdev,b), rdev->desc_nr); 3333 bdevname(rdev->bdev,b), rdev->desc_nr);
3334 if (test_bit(WriteMostly, &rdev->flags))
3335 seq_printf(seq, "(W)");
3262 if (rdev->faulty) { 3336 if (rdev->faulty) {
3263 seq_printf(seq, "(F)"); 3337 seq_printf(seq, "(F)");
3264 continue; 3338 continue;
3265 } 3339 } else if (rdev->raid_disk < 0)
3340 seq_printf(seq, "(S)"); /* spare */
3266 size += rdev->size; 3341 size += rdev->size;
3267 } 3342 }
3268 3343
@@ -3274,6 +3349,15 @@ static int md_seq_show(struct seq_file *seq, void *v)
3274 seq_printf(seq, "\n %llu blocks", 3349 seq_printf(seq, "\n %llu blocks",
3275 (unsigned long long)size); 3350 (unsigned long long)size);
3276 } 3351 }
3352 if (mddev->persistent) {
3353 if (mddev->major_version != 0 ||
3354 mddev->minor_version != 90) {
3355 seq_printf(seq," super %d.%d",
3356 mddev->major_version,
3357 mddev->minor_version);
3358 }
3359 } else
3360 seq_printf(seq, " super non-persistent");
3277 3361
3278 if (mddev->pers) { 3362 if (mddev->pers) {
3279 mddev->pers->status (seq, mddev); 3363 mddev->pers->status (seq, mddev);
@@ -3416,7 +3500,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
3416 */ 3500 */
3417void md_write_start(mddev_t *mddev, struct bio *bi) 3501void md_write_start(mddev_t *mddev, struct bio *bi)
3418{ 3502{
3419 DEFINE_WAIT(w);
3420 if (bio_data_dir(bi) != WRITE) 3503 if (bio_data_dir(bi) != WRITE)
3421 return; 3504 return;
3422 3505
@@ -3533,7 +3616,7 @@ static void md_do_sync(mddev_t *mddev)
3533 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); 3616 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
3534 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" 3617 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
3535 " %d KB/sec/disc.\n", sysctl_speed_limit_min); 3618 " %d KB/sec/disc.\n", sysctl_speed_limit_min);
3536 printk(KERN_INFO "md: using maximum available idle IO bandwith " 3619 printk(KERN_INFO "md: using maximum available idle IO bandwidth "
3537 "(but not more than %d KB/sec) for reconstruction.\n", 3620 "(but not more than %d KB/sec) for reconstruction.\n",
3538 sysctl_speed_limit_max); 3621 sysctl_speed_limit_max);
3539 3622