diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 615 |
1 files changed, 398 insertions, 217 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 2580ac1b9b0f..c2ff77ccec50 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -169,7 +169,6 @@ void md_new_event(mddev_t *mddev) | |||
169 | { | 169 | { |
170 | atomic_inc(&md_event_count); | 170 | atomic_inc(&md_event_count); |
171 | wake_up(&md_event_waiters); | 171 | wake_up(&md_event_waiters); |
172 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
173 | } | 172 | } |
174 | EXPORT_SYMBOL_GPL(md_new_event); | 173 | EXPORT_SYMBOL_GPL(md_new_event); |
175 | 174 | ||
@@ -274,10 +273,12 @@ static mddev_t * mddev_find(dev_t unit) | |||
274 | INIT_LIST_HEAD(&new->all_mddevs); | 273 | INIT_LIST_HEAD(&new->all_mddevs); |
275 | init_timer(&new->safemode_timer); | 274 | init_timer(&new->safemode_timer); |
276 | atomic_set(&new->active, 1); | 275 | atomic_set(&new->active, 1); |
276 | atomic_set(&new->openers, 0); | ||
277 | spin_lock_init(&new->write_lock); | 277 | spin_lock_init(&new->write_lock); |
278 | init_waitqueue_head(&new->sb_wait); | 278 | init_waitqueue_head(&new->sb_wait); |
279 | init_waitqueue_head(&new->recovery_wait); | 279 | init_waitqueue_head(&new->recovery_wait); |
280 | new->reshape_position = MaxSector; | 280 | new->reshape_position = MaxSector; |
281 | new->resync_min = 0; | ||
281 | new->resync_max = MaxSector; | 282 | new->resync_max = MaxSector; |
282 | new->level = LEVEL_NONE; | 283 | new->level = LEVEL_NONE; |
283 | 284 | ||
@@ -347,21 +348,20 @@ static struct mdk_personality *find_pers(int level, char *clevel) | |||
347 | return NULL; | 348 | return NULL; |
348 | } | 349 | } |
349 | 350 | ||
351 | /* return the offset of the super block in 512byte sectors */ | ||
350 | static inline sector_t calc_dev_sboffset(struct block_device *bdev) | 352 | static inline sector_t calc_dev_sboffset(struct block_device *bdev) |
351 | { | 353 | { |
352 | sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 354 | sector_t num_sectors = bdev->bd_inode->i_size / 512; |
353 | return MD_NEW_SIZE_BLOCKS(size); | 355 | return MD_NEW_SIZE_SECTORS(num_sectors); |
354 | } | 356 | } |
355 | 357 | ||
356 | static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size) | 358 | static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) |
357 | { | 359 | { |
358 | sector_t size; | 360 | sector_t num_sectors = rdev->sb_start; |
359 | |||
360 | size = rdev->sb_offset; | ||
361 | 361 | ||
362 | if (chunk_size) | 362 | if (chunk_size) |
363 | size &= ~((sector_t)chunk_size/1024 - 1); | 363 | num_sectors &= ~((sector_t)chunk_size/512 - 1); |
364 | return size; | 364 | return num_sectors; |
365 | } | 365 | } |
366 | 366 | ||
367 | static int alloc_disk_sb(mdk_rdev_t * rdev) | 367 | static int alloc_disk_sb(mdk_rdev_t * rdev) |
@@ -372,7 +372,7 @@ static int alloc_disk_sb(mdk_rdev_t * rdev) | |||
372 | rdev->sb_page = alloc_page(GFP_KERNEL); | 372 | rdev->sb_page = alloc_page(GFP_KERNEL); |
373 | if (!rdev->sb_page) { | 373 | if (!rdev->sb_page) { |
374 | printk(KERN_ALERT "md: out of memory.\n"); | 374 | printk(KERN_ALERT "md: out of memory.\n"); |
375 | return -EINVAL; | 375 | return -ENOMEM; |
376 | } | 376 | } |
377 | 377 | ||
378 | return 0; | 378 | return 0; |
@@ -384,7 +384,7 @@ static void free_disk_sb(mdk_rdev_t * rdev) | |||
384 | put_page(rdev->sb_page); | 384 | put_page(rdev->sb_page); |
385 | rdev->sb_loaded = 0; | 385 | rdev->sb_loaded = 0; |
386 | rdev->sb_page = NULL; | 386 | rdev->sb_page = NULL; |
387 | rdev->sb_offset = 0; | 387 | rdev->sb_start = 0; |
388 | rdev->size = 0; | 388 | rdev->size = 0; |
389 | } | 389 | } |
390 | } | 390 | } |
@@ -530,7 +530,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) | |||
530 | return 0; | 530 | return 0; |
531 | 531 | ||
532 | 532 | ||
533 | if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) | 533 | if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) |
534 | goto fail; | 534 | goto fail; |
535 | rdev->sb_loaded = 1; | 535 | rdev->sb_loaded = 1; |
536 | return 0; | 536 | return 0; |
@@ -543,17 +543,12 @@ fail: | |||
543 | 543 | ||
544 | static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) | 544 | static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) |
545 | { | 545 | { |
546 | if ( (sb1->set_uuid0 == sb2->set_uuid0) && | 546 | return sb1->set_uuid0 == sb2->set_uuid0 && |
547 | (sb1->set_uuid1 == sb2->set_uuid1) && | 547 | sb1->set_uuid1 == sb2->set_uuid1 && |
548 | (sb1->set_uuid2 == sb2->set_uuid2) && | 548 | sb1->set_uuid2 == sb2->set_uuid2 && |
549 | (sb1->set_uuid3 == sb2->set_uuid3)) | 549 | sb1->set_uuid3 == sb2->set_uuid3; |
550 | |||
551 | return 1; | ||
552 | |||
553 | return 0; | ||
554 | } | 550 | } |
555 | 551 | ||
556 | |||
557 | static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) | 552 | static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) |
558 | { | 553 | { |
559 | int ret; | 554 | int ret; |
@@ -564,7 +559,7 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) | |||
564 | 559 | ||
565 | if (!tmp1 || !tmp2) { | 560 | if (!tmp1 || !tmp2) { |
566 | ret = 0; | 561 | ret = 0; |
567 | printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n"); | 562 | printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n"); |
568 | goto abort; | 563 | goto abort; |
569 | } | 564 | } |
570 | 565 | ||
@@ -577,11 +572,7 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) | |||
577 | tmp1->nr_disks = 0; | 572 | tmp1->nr_disks = 0; |
578 | tmp2->nr_disks = 0; | 573 | tmp2->nr_disks = 0; |
579 | 574 | ||
580 | if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) | 575 | ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0); |
581 | ret = 0; | ||
582 | else | ||
583 | ret = 1; | ||
584 | |||
585 | abort: | 576 | abort: |
586 | kfree(tmp1); | 577 | kfree(tmp1); |
587 | kfree(tmp2); | 578 | kfree(tmp2); |
@@ -658,11 +649,14 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) | |||
658 | */ | 649 | */ |
659 | 650 | ||
660 | struct super_type { | 651 | struct super_type { |
661 | char *name; | 652 | char *name; |
662 | struct module *owner; | 653 | struct module *owner; |
663 | int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version); | 654 | int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, |
664 | int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); | 655 | int minor_version); |
665 | void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev); | 656 | int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); |
657 | void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev); | ||
658 | unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev, | ||
659 | sector_t num_sectors); | ||
666 | }; | 660 | }; |
667 | 661 | ||
668 | /* | 662 | /* |
@@ -673,16 +667,14 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
673 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 667 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
674 | mdp_super_t *sb; | 668 | mdp_super_t *sb; |
675 | int ret; | 669 | int ret; |
676 | sector_t sb_offset; | ||
677 | 670 | ||
678 | /* | 671 | /* |
679 | * Calculate the position of the superblock, | 672 | * Calculate the position of the superblock (512byte sectors), |
680 | * it's at the end of the disk. | 673 | * it's at the end of the disk. |
681 | * | 674 | * |
682 | * It also happens to be a multiple of 4Kb. | 675 | * It also happens to be a multiple of 4Kb. |
683 | */ | 676 | */ |
684 | sb_offset = calc_dev_sboffset(rdev->bdev); | 677 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
685 | rdev->sb_offset = sb_offset; | ||
686 | 678 | ||
687 | ret = read_disk_sb(rdev, MD_SB_BYTES); | 679 | ret = read_disk_sb(rdev, MD_SB_BYTES); |
688 | if (ret) return ret; | 680 | if (ret) return ret; |
@@ -759,7 +751,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
759 | else | 751 | else |
760 | ret = 0; | 752 | ret = 0; |
761 | } | 753 | } |
762 | rdev->size = calc_dev_size(rdev, sb->chunk_size); | 754 | rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2; |
763 | 755 | ||
764 | if (rdev->size < sb->size && sb->level > 1) | 756 | if (rdev->size < sb->size && sb->level > 1) |
765 | /* "this cannot possibly happen" ... */ | 757 | /* "this cannot possibly happen" ... */ |
@@ -1004,6 +996,26 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1004 | } | 996 | } |
1005 | 997 | ||
1006 | /* | 998 | /* |
999 | * rdev_size_change for 0.90.0 | ||
1000 | */ | ||
1001 | static unsigned long long | ||
1002 | super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | ||
1003 | { | ||
1004 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | ||
1005 | return 0; /* component must fit device */ | ||
1006 | if (rdev->mddev->bitmap_offset) | ||
1007 | return 0; /* can't move bitmap */ | ||
1008 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); | ||
1009 | if (!num_sectors || num_sectors > rdev->sb_start) | ||
1010 | num_sectors = rdev->sb_start; | ||
1011 | md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, | ||
1012 | rdev->sb_page); | ||
1013 | md_super_wait(rdev->mddev); | ||
1014 | return num_sectors / 2; /* kB for sysfs */ | ||
1015 | } | ||
1016 | |||
1017 | |||
1018 | /* | ||
1007 | * version 1 superblock | 1019 | * version 1 superblock |
1008 | */ | 1020 | */ |
1009 | 1021 | ||
@@ -1034,12 +1046,12 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1034 | { | 1046 | { |
1035 | struct mdp_superblock_1 *sb; | 1047 | struct mdp_superblock_1 *sb; |
1036 | int ret; | 1048 | int ret; |
1037 | sector_t sb_offset; | 1049 | sector_t sb_start; |
1038 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; | 1050 | char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; |
1039 | int bmask; | 1051 | int bmask; |
1040 | 1052 | ||
1041 | /* | 1053 | /* |
1042 | * Calculate the position of the superblock. | 1054 | * Calculate the position of the superblock in 512byte sectors. |
1043 | * It is always aligned to a 4K boundary and | 1055 | * It is always aligned to a 4K boundary and |
1044 | * depeding on minor_version, it can be: | 1056 | * depeding on minor_version, it can be: |
1045 | * 0: At least 8K, but less than 12K, from end of device | 1057 | * 0: At least 8K, but less than 12K, from end of device |
@@ -1048,22 +1060,20 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1048 | */ | 1060 | */ |
1049 | switch(minor_version) { | 1061 | switch(minor_version) { |
1050 | case 0: | 1062 | case 0: |
1051 | sb_offset = rdev->bdev->bd_inode->i_size >> 9; | 1063 | sb_start = rdev->bdev->bd_inode->i_size >> 9; |
1052 | sb_offset -= 8*2; | 1064 | sb_start -= 8*2; |
1053 | sb_offset &= ~(sector_t)(4*2-1); | 1065 | sb_start &= ~(sector_t)(4*2-1); |
1054 | /* convert from sectors to K */ | ||
1055 | sb_offset /= 2; | ||
1056 | break; | 1066 | break; |
1057 | case 1: | 1067 | case 1: |
1058 | sb_offset = 0; | 1068 | sb_start = 0; |
1059 | break; | 1069 | break; |
1060 | case 2: | 1070 | case 2: |
1061 | sb_offset = 4; | 1071 | sb_start = 8; |
1062 | break; | 1072 | break; |
1063 | default: | 1073 | default: |
1064 | return -EINVAL; | 1074 | return -EINVAL; |
1065 | } | 1075 | } |
1066 | rdev->sb_offset = sb_offset; | 1076 | rdev->sb_start = sb_start; |
1067 | 1077 | ||
1068 | /* superblock is rarely larger than 1K, but it can be larger, | 1078 | /* superblock is rarely larger than 1K, but it can be larger, |
1069 | * and it is safe to read 4k, so we do that | 1079 | * and it is safe to read 4k, so we do that |
@@ -1077,7 +1087,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1077 | if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || | 1087 | if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || |
1078 | sb->major_version != cpu_to_le32(1) || | 1088 | sb->major_version != cpu_to_le32(1) || |
1079 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || | 1089 | le32_to_cpu(sb->max_dev) > (4096-256)/2 || |
1080 | le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || | 1090 | le64_to_cpu(sb->super_offset) != rdev->sb_start || |
1081 | (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) | 1091 | (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) |
1082 | return -EINVAL; | 1092 | return -EINVAL; |
1083 | 1093 | ||
@@ -1113,7 +1123,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1113 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | 1123 | rdev->sb_size = (rdev->sb_size | bmask) + 1; |
1114 | 1124 | ||
1115 | if (minor_version | 1125 | if (minor_version |
1116 | && rdev->data_offset < sb_offset + (rdev->sb_size/512)) | 1126 | && rdev->data_offset < sb_start + (rdev->sb_size/512)) |
1117 | return -EINVAL; | 1127 | return -EINVAL; |
1118 | 1128 | ||
1119 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) | 1129 | if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) |
@@ -1149,7 +1159,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) | |||
1149 | if (minor_version) | 1159 | if (minor_version) |
1150 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; | 1160 | rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; |
1151 | else | 1161 | else |
1152 | rdev->size = rdev->sb_offset; | 1162 | rdev->size = rdev->sb_start / 2; |
1153 | if (rdev->size < le64_to_cpu(sb->data_size)/2) | 1163 | if (rdev->size < le64_to_cpu(sb->data_size)/2) |
1154 | return -EINVAL; | 1164 | return -EINVAL; |
1155 | rdev->size = le64_to_cpu(sb->data_size)/2; | 1165 | rdev->size = le64_to_cpu(sb->data_size)/2; |
@@ -1328,35 +1338,74 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1328 | sb->sb_csum = calc_sb_1_csum(sb); | 1338 | sb->sb_csum = calc_sb_1_csum(sb); |
1329 | } | 1339 | } |
1330 | 1340 | ||
1341 | static unsigned long long | ||
1342 | super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) | ||
1343 | { | ||
1344 | struct mdp_superblock_1 *sb; | ||
1345 | sector_t max_sectors; | ||
1346 | if (num_sectors && num_sectors < rdev->mddev->size * 2) | ||
1347 | return 0; /* component must fit device */ | ||
1348 | if (rdev->sb_start < rdev->data_offset) { | ||
1349 | /* minor versions 1 and 2; superblock before data */ | ||
1350 | max_sectors = rdev->bdev->bd_inode->i_size >> 9; | ||
1351 | max_sectors -= rdev->data_offset; | ||
1352 | if (!num_sectors || num_sectors > max_sectors) | ||
1353 | num_sectors = max_sectors; | ||
1354 | } else if (rdev->mddev->bitmap_offset) { | ||
1355 | /* minor version 0 with bitmap we can't move */ | ||
1356 | return 0; | ||
1357 | } else { | ||
1358 | /* minor version 0; superblock after data */ | ||
1359 | sector_t sb_start; | ||
1360 | sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; | ||
1361 | sb_start &= ~(sector_t)(4*2 - 1); | ||
1362 | max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; | ||
1363 | if (!num_sectors || num_sectors > max_sectors) | ||
1364 | num_sectors = max_sectors; | ||
1365 | rdev->sb_start = sb_start; | ||
1366 | } | ||
1367 | sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page); | ||
1368 | sb->data_size = cpu_to_le64(num_sectors); | ||
1369 | sb->super_offset = rdev->sb_start; | ||
1370 | sb->sb_csum = calc_sb_1_csum(sb); | ||
1371 | md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, | ||
1372 | rdev->sb_page); | ||
1373 | md_super_wait(rdev->mddev); | ||
1374 | return num_sectors / 2; /* kB for sysfs */ | ||
1375 | } | ||
1331 | 1376 | ||
1332 | static struct super_type super_types[] = { | 1377 | static struct super_type super_types[] = { |
1333 | [0] = { | 1378 | [0] = { |
1334 | .name = "0.90.0", | 1379 | .name = "0.90.0", |
1335 | .owner = THIS_MODULE, | 1380 | .owner = THIS_MODULE, |
1336 | .load_super = super_90_load, | 1381 | .load_super = super_90_load, |
1337 | .validate_super = super_90_validate, | 1382 | .validate_super = super_90_validate, |
1338 | .sync_super = super_90_sync, | 1383 | .sync_super = super_90_sync, |
1384 | .rdev_size_change = super_90_rdev_size_change, | ||
1339 | }, | 1385 | }, |
1340 | [1] = { | 1386 | [1] = { |
1341 | .name = "md-1", | 1387 | .name = "md-1", |
1342 | .owner = THIS_MODULE, | 1388 | .owner = THIS_MODULE, |
1343 | .load_super = super_1_load, | 1389 | .load_super = super_1_load, |
1344 | .validate_super = super_1_validate, | 1390 | .validate_super = super_1_validate, |
1345 | .sync_super = super_1_sync, | 1391 | .sync_super = super_1_sync, |
1392 | .rdev_size_change = super_1_rdev_size_change, | ||
1346 | }, | 1393 | }, |
1347 | }; | 1394 | }; |
1348 | 1395 | ||
1349 | static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | 1396 | static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) |
1350 | { | 1397 | { |
1351 | struct list_head *tmp, *tmp2; | ||
1352 | mdk_rdev_t *rdev, *rdev2; | 1398 | mdk_rdev_t *rdev, *rdev2; |
1353 | 1399 | ||
1354 | rdev_for_each(rdev, tmp, mddev1) | 1400 | rcu_read_lock(); |
1355 | rdev_for_each(rdev2, tmp2, mddev2) | 1401 | rdev_for_each_rcu(rdev, mddev1) |
1402 | rdev_for_each_rcu(rdev2, mddev2) | ||
1356 | if (rdev->bdev->bd_contains == | 1403 | if (rdev->bdev->bd_contains == |
1357 | rdev2->bdev->bd_contains) | 1404 | rdev2->bdev->bd_contains) { |
1405 | rcu_read_unlock(); | ||
1358 | return 1; | 1406 | return 1; |
1359 | 1407 | } | |
1408 | rcu_read_unlock(); | ||
1360 | return 0; | 1409 | return 0; |
1361 | } | 1410 | } |
1362 | 1411 | ||
@@ -1423,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1423 | kobject_del(&rdev->kobj); | 1472 | kobject_del(&rdev->kobj); |
1424 | goto fail; | 1473 | goto fail; |
1425 | } | 1474 | } |
1426 | list_add(&rdev->same_set, &mddev->disks); | 1475 | list_add_rcu(&rdev->same_set, &mddev->disks); |
1427 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); | 1476 | bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); |
1428 | return 0; | 1477 | return 0; |
1429 | 1478 | ||
@@ -1448,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) | |||
1448 | return; | 1497 | return; |
1449 | } | 1498 | } |
1450 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); | 1499 | bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); |
1451 | list_del_init(&rdev->same_set); | 1500 | list_del_rcu(&rdev->same_set); |
1452 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); | 1501 | printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); |
1453 | rdev->mddev = NULL; | 1502 | rdev->mddev = NULL; |
1454 | sysfs_remove_link(&rdev->kobj, "block"); | 1503 | sysfs_remove_link(&rdev->kobj, "block"); |
1455 | 1504 | ||
1456 | /* We need to delay this, otherwise we can deadlock when | 1505 | /* We need to delay this, otherwise we can deadlock when |
1457 | * writing to 'remove' to "dev/state" | 1506 | * writing to 'remove' to "dev/state". We also need |
1507 | * to delay it due to rcu usage. | ||
1458 | */ | 1508 | */ |
1509 | synchronize_rcu(); | ||
1459 | INIT_WORK(&rdev->del_work, md_delayed_delete); | 1510 | INIT_WORK(&rdev->del_work, md_delayed_delete); |
1460 | kobject_get(&rdev->kobj); | 1511 | kobject_get(&rdev->kobj); |
1461 | schedule_work(&rdev->del_work); | 1512 | schedule_work(&rdev->del_work); |
@@ -1511,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev) | |||
1511 | if (rdev->mddev) | 1562 | if (rdev->mddev) |
1512 | MD_BUG(); | 1563 | MD_BUG(); |
1513 | free_disk_sb(rdev); | 1564 | free_disk_sb(rdev); |
1514 | list_del_init(&rdev->same_set); | ||
1515 | #ifndef MODULE | 1565 | #ifndef MODULE |
1516 | if (test_bit(AutoDetected, &rdev->flags)) | 1566 | if (test_bit(AutoDetected, &rdev->flags)) |
1517 | md_autodetect_dev(rdev->bdev->bd_dev); | 1567 | md_autodetect_dev(rdev->bdev->bd_dev); |
@@ -1758,11 +1808,11 @@ repeat: | |||
1758 | dprintk("%s ", bdevname(rdev->bdev,b)); | 1808 | dprintk("%s ", bdevname(rdev->bdev,b)); |
1759 | if (!test_bit(Faulty, &rdev->flags)) { | 1809 | if (!test_bit(Faulty, &rdev->flags)) { |
1760 | md_super_write(mddev,rdev, | 1810 | md_super_write(mddev,rdev, |
1761 | rdev->sb_offset<<1, rdev->sb_size, | 1811 | rdev->sb_start, rdev->sb_size, |
1762 | rdev->sb_page); | 1812 | rdev->sb_page); |
1763 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", | 1813 | dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", |
1764 | bdevname(rdev->bdev,b), | 1814 | bdevname(rdev->bdev,b), |
1765 | (unsigned long long)rdev->sb_offset); | 1815 | (unsigned long long)rdev->sb_start); |
1766 | rdev->sb_events = mddev->events; | 1816 | rdev->sb_events = mddev->events; |
1767 | 1817 | ||
1768 | } else | 1818 | } else |
@@ -1787,7 +1837,7 @@ repeat: | |||
1787 | 1837 | ||
1788 | } | 1838 | } |
1789 | 1839 | ||
1790 | /* words written to sysfs files may, or my not, be \n terminated. | 1840 | /* words written to sysfs files may, or may not, be \n terminated. |
1791 | * We want to accept with case. For this we use cmd_match. | 1841 | * We want to accept with case. For this we use cmd_match. |
1792 | */ | 1842 | */ |
1793 | static int cmd_match(const char *cmd, const char *str) | 1843 | static int cmd_match(const char *cmd, const char *str) |
@@ -1886,6 +1936,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1886 | 1936 | ||
1887 | err = 0; | 1937 | err = 0; |
1888 | } | 1938 | } |
1939 | if (!err) | ||
1940 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
1889 | return err ? err : len; | 1941 | return err ? err : len; |
1890 | } | 1942 | } |
1891 | static struct rdev_sysfs_entry rdev_state = | 1943 | static struct rdev_sysfs_entry rdev_state = |
@@ -1931,7 +1983,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1931 | slot = -1; | 1983 | slot = -1; |
1932 | else if (e==buf || (*e && *e!= '\n')) | 1984 | else if (e==buf || (*e && *e!= '\n')) |
1933 | return -EINVAL; | 1985 | return -EINVAL; |
1934 | if (rdev->mddev->pers) { | 1986 | if (rdev->mddev->pers && slot == -1) { |
1935 | /* Setting 'slot' on an active array requires also | 1987 | /* Setting 'slot' on an active array requires also |
1936 | * updating the 'rd%d' link, and communicating | 1988 | * updating the 'rd%d' link, and communicating |
1937 | * with the personality with ->hot_*_disk. | 1989 | * with the personality with ->hot_*_disk. |
@@ -1939,8 +1991,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1939 | * failed/spare devices. This normally happens automatically, | 1991 | * failed/spare devices. This normally happens automatically, |
1940 | * but not when the metadata is externally managed. | 1992 | * but not when the metadata is externally managed. |
1941 | */ | 1993 | */ |
1942 | if (slot != -1) | ||
1943 | return -EBUSY; | ||
1944 | if (rdev->raid_disk == -1) | 1994 | if (rdev->raid_disk == -1) |
1945 | return -EEXIST; | 1995 | return -EEXIST; |
1946 | /* personality does all needed checks */ | 1996 | /* personality does all needed checks */ |
@@ -1954,6 +2004,43 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1954 | sysfs_remove_link(&rdev->mddev->kobj, nm); | 2004 | sysfs_remove_link(&rdev->mddev->kobj, nm); |
1955 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); | 2005 | set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); |
1956 | md_wakeup_thread(rdev->mddev->thread); | 2006 | md_wakeup_thread(rdev->mddev->thread); |
2007 | } else if (rdev->mddev->pers) { | ||
2008 | mdk_rdev_t *rdev2; | ||
2009 | struct list_head *tmp; | ||
2010 | /* Activating a spare .. or possibly reactivating | ||
2011 | * if we every get bitmaps working here. | ||
2012 | */ | ||
2013 | |||
2014 | if (rdev->raid_disk != -1) | ||
2015 | return -EBUSY; | ||
2016 | |||
2017 | if (rdev->mddev->pers->hot_add_disk == NULL) | ||
2018 | return -EINVAL; | ||
2019 | |||
2020 | rdev_for_each(rdev2, tmp, rdev->mddev) | ||
2021 | if (rdev2->raid_disk == slot) | ||
2022 | return -EEXIST; | ||
2023 | |||
2024 | rdev->raid_disk = slot; | ||
2025 | if (test_bit(In_sync, &rdev->flags)) | ||
2026 | rdev->saved_raid_disk = slot; | ||
2027 | else | ||
2028 | rdev->saved_raid_disk = -1; | ||
2029 | err = rdev->mddev->pers-> | ||
2030 | hot_add_disk(rdev->mddev, rdev); | ||
2031 | if (err) { | ||
2032 | rdev->raid_disk = -1; | ||
2033 | return err; | ||
2034 | } else | ||
2035 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
2036 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
2037 | if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) | ||
2038 | printk(KERN_WARNING | ||
2039 | "md: cannot register " | ||
2040 | "%s for %s\n", | ||
2041 | nm, mdname(rdev->mddev)); | ||
2042 | |||
2043 | /* don't wakeup anyone, leave that to userspace. */ | ||
1957 | } else { | 2044 | } else { |
1958 | if (slot >= rdev->mddev->raid_disks) | 2045 | if (slot >= rdev->mddev->raid_disks) |
1959 | return -ENOSPC; | 2046 | return -ENOSPC; |
@@ -1962,6 +2049,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1962 | clear_bit(Faulty, &rdev->flags); | 2049 | clear_bit(Faulty, &rdev->flags); |
1963 | clear_bit(WriteMostly, &rdev->flags); | 2050 | clear_bit(WriteMostly, &rdev->flags); |
1964 | set_bit(In_sync, &rdev->flags); | 2051 | set_bit(In_sync, &rdev->flags); |
2052 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
1965 | } | 2053 | } |
1966 | return len; | 2054 | return len; |
1967 | } | 2055 | } |
@@ -1983,7 +2071,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
1983 | unsigned long long offset = simple_strtoull(buf, &e, 10); | 2071 | unsigned long long offset = simple_strtoull(buf, &e, 10); |
1984 | if (e==buf || (*e && *e != '\n')) | 2072 | if (e==buf || (*e && *e != '\n')) |
1985 | return -EINVAL; | 2073 | return -EINVAL; |
1986 | if (rdev->mddev->pers) | 2074 | if (rdev->mddev->pers && rdev->raid_disk >= 0) |
1987 | return -EBUSY; | 2075 | return -EBUSY; |
1988 | if (rdev->size && rdev->mddev->external) | 2076 | if (rdev->size && rdev->mddev->external) |
1989 | /* Must set offset before size, so overlap checks | 2077 | /* Must set offset before size, so overlap checks |
@@ -2015,17 +2103,30 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) | |||
2015 | static ssize_t | 2103 | static ssize_t |
2016 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | 2104 | rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) |
2017 | { | 2105 | { |
2018 | char *e; | 2106 | unsigned long long size; |
2019 | unsigned long long size = simple_strtoull(buf, &e, 10); | ||
2020 | unsigned long long oldsize = rdev->size; | 2107 | unsigned long long oldsize = rdev->size; |
2021 | mddev_t *my_mddev = rdev->mddev; | 2108 | mddev_t *my_mddev = rdev->mddev; |
2022 | 2109 | ||
2023 | if (e==buf || (*e && *e != '\n')) | 2110 | if (strict_strtoull(buf, 10, &size) < 0) |
2024 | return -EINVAL; | 2111 | return -EINVAL; |
2025 | if (my_mddev->pers) | 2112 | if (size < my_mddev->size) |
2026 | return -EBUSY; | 2113 | return -EINVAL; |
2114 | if (my_mddev->pers && rdev->raid_disk >= 0) { | ||
2115 | if (my_mddev->persistent) { | ||
2116 | size = super_types[my_mddev->major_version]. | ||
2117 | rdev_size_change(rdev, size * 2); | ||
2118 | if (!size) | ||
2119 | return -EBUSY; | ||
2120 | } else if (!size) { | ||
2121 | size = (rdev->bdev->bd_inode->i_size >> 10); | ||
2122 | size -= rdev->data_offset/2; | ||
2123 | } | ||
2124 | if (size < my_mddev->size) | ||
2125 | return -EINVAL; /* component must fit device */ | ||
2126 | } | ||
2127 | |||
2027 | rdev->size = size; | 2128 | rdev->size = size; |
2028 | if (size > oldsize && rdev->mddev->external) { | 2129 | if (size > oldsize && my_mddev->external) { |
2029 | /* need to check that all other rdevs with the same ->bdev | 2130 | /* need to check that all other rdevs with the same ->bdev |
2030 | * do not overlap. We need to unlock the mddev to avoid | 2131 | * do not overlap. We need to unlock the mddev to avoid |
2031 | * a deadlock. We have already changed rdev->size, and if | 2132 | * a deadlock. We have already changed rdev->size, and if |
@@ -2044,8 +2145,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2044 | if (test_bit(AllReserved, &rdev2->flags) || | 2145 | if (test_bit(AllReserved, &rdev2->flags) || |
2045 | (rdev->bdev == rdev2->bdev && | 2146 | (rdev->bdev == rdev2->bdev && |
2046 | rdev != rdev2 && | 2147 | rdev != rdev2 && |
2047 | overlaps(rdev->data_offset, rdev->size, | 2148 | overlaps(rdev->data_offset, rdev->size * 2, |
2048 | rdev2->data_offset, rdev2->size))) { | 2149 | rdev2->data_offset, |
2150 | rdev2->size * 2))) { | ||
2049 | overlap = 1; | 2151 | overlap = 1; |
2050 | break; | 2152 | break; |
2051 | } | 2153 | } |
@@ -2067,8 +2169,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) | |||
2067 | return -EBUSY; | 2169 | return -EBUSY; |
2068 | } | 2170 | } |
2069 | } | 2171 | } |
2070 | if (size < my_mddev->size || my_mddev->size == 0) | ||
2071 | my_mddev->size = size; | ||
2072 | return len; | 2172 | return len; |
2073 | } | 2173 | } |
2074 | 2174 | ||
@@ -2512,7 +2612,7 @@ __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); | |||
2512 | * When written, doesn't tear down array, but just stops it | 2612 | * When written, doesn't tear down array, but just stops it |
2513 | * suspended (not supported yet) | 2613 | * suspended (not supported yet) |
2514 | * All IO requests will block. The array can be reconfigured. | 2614 | * All IO requests will block. The array can be reconfigured. |
2515 | * Writing this, if accepted, will block until array is quiessent | 2615 | * Writing this, if accepted, will block until array is quiescent |
2516 | * readonly | 2616 | * readonly |
2517 | * no resync can happen. no superblocks get written. | 2617 | * no resync can happen. no superblocks get written. |
2518 | * write requests fail | 2618 | * write requests fail |
@@ -2585,7 +2685,7 @@ array_state_show(mddev_t *mddev, char *page) | |||
2585 | return sprintf(page, "%s\n", array_states[st]); | 2685 | return sprintf(page, "%s\n", array_states[st]); |
2586 | } | 2686 | } |
2587 | 2687 | ||
2588 | static int do_md_stop(mddev_t * mddev, int ro); | 2688 | static int do_md_stop(mddev_t * mddev, int ro, int is_open); |
2589 | static int do_md_run(mddev_t * mddev); | 2689 | static int do_md_run(mddev_t * mddev); |
2590 | static int restart_array(mddev_t *mddev); | 2690 | static int restart_array(mddev_t *mddev); |
2591 | 2691 | ||
@@ -2599,16 +2699,16 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2599 | break; | 2699 | break; |
2600 | case clear: | 2700 | case clear: |
2601 | /* stopping an active array */ | 2701 | /* stopping an active array */ |
2602 | if (atomic_read(&mddev->active) > 1) | 2702 | if (atomic_read(&mddev->openers) > 0) |
2603 | return -EBUSY; | 2703 | return -EBUSY; |
2604 | err = do_md_stop(mddev, 0); | 2704 | err = do_md_stop(mddev, 0, 0); |
2605 | break; | 2705 | break; |
2606 | case inactive: | 2706 | case inactive: |
2607 | /* stopping an active array */ | 2707 | /* stopping an active array */ |
2608 | if (mddev->pers) { | 2708 | if (mddev->pers) { |
2609 | if (atomic_read(&mddev->active) > 1) | 2709 | if (atomic_read(&mddev->openers) > 0) |
2610 | return -EBUSY; | 2710 | return -EBUSY; |
2611 | err = do_md_stop(mddev, 2); | 2711 | err = do_md_stop(mddev, 2, 0); |
2612 | } else | 2712 | } else |
2613 | err = 0; /* already inactive */ | 2713 | err = 0; /* already inactive */ |
2614 | break; | 2714 | break; |
@@ -2616,7 +2716,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2616 | break; /* not supported yet */ | 2716 | break; /* not supported yet */ |
2617 | case readonly: | 2717 | case readonly: |
2618 | if (mddev->pers) | 2718 | if (mddev->pers) |
2619 | err = do_md_stop(mddev, 1); | 2719 | err = do_md_stop(mddev, 1, 0); |
2620 | else { | 2720 | else { |
2621 | mddev->ro = 1; | 2721 | mddev->ro = 1; |
2622 | set_disk_ro(mddev->gendisk, 1); | 2722 | set_disk_ro(mddev->gendisk, 1); |
@@ -2626,7 +2726,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2626 | case read_auto: | 2726 | case read_auto: |
2627 | if (mddev->pers) { | 2727 | if (mddev->pers) { |
2628 | if (mddev->ro != 1) | 2728 | if (mddev->ro != 1) |
2629 | err = do_md_stop(mddev, 1); | 2729 | err = do_md_stop(mddev, 1, 0); |
2630 | else | 2730 | else |
2631 | err = restart_array(mddev); | 2731 | err = restart_array(mddev); |
2632 | if (err == 0) { | 2732 | if (err == 0) { |
@@ -2681,8 +2781,10 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2681 | } | 2781 | } |
2682 | if (err) | 2782 | if (err) |
2683 | return err; | 2783 | return err; |
2684 | else | 2784 | else { |
2785 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
2685 | return len; | 2786 | return len; |
2787 | } | ||
2686 | } | 2788 | } |
2687 | static struct md_sysfs_entry md_array_state = | 2789 | static struct md_sysfs_entry md_array_state = |
2688 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); | 2790 | __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); |
@@ -2785,7 +2887,7 @@ size_show(mddev_t *mddev, char *page) | |||
2785 | return sprintf(page, "%llu\n", (unsigned long long)mddev->size); | 2887 | return sprintf(page, "%llu\n", (unsigned long long)mddev->size); |
2786 | } | 2888 | } |
2787 | 2889 | ||
2788 | static int update_size(mddev_t *mddev, unsigned long size); | 2890 | static int update_size(mddev_t *mddev, sector_t num_sectors); |
2789 | 2891 | ||
2790 | static ssize_t | 2892 | static ssize_t |
2791 | size_store(mddev_t *mddev, const char *buf, size_t len) | 2893 | size_store(mddev_t *mddev, const char *buf, size_t len) |
@@ -2802,7 +2904,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2802 | return -EINVAL; | 2904 | return -EINVAL; |
2803 | 2905 | ||
2804 | if (mddev->pers) { | 2906 | if (mddev->pers) { |
2805 | err = update_size(mddev, size); | 2907 | err = update_size(mddev, size * 2); |
2806 | md_update_sb(mddev, 1); | 2908 | md_update_sb(mddev, 1); |
2807 | } else { | 2909 | } else { |
2808 | if (mddev->size == 0 || | 2910 | if (mddev->size == 0 || |
@@ -2899,7 +3001,7 @@ action_show(mddev_t *mddev, char *page) | |||
2899 | type = "check"; | 3001 | type = "check"; |
2900 | else | 3002 | else |
2901 | type = "repair"; | 3003 | type = "repair"; |
2902 | } else | 3004 | } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) |
2903 | type = "recover"; | 3005 | type = "recover"; |
2904 | } | 3006 | } |
2905 | return sprintf(page, "%s\n", type); | 3007 | return sprintf(page, "%s\n", type); |
@@ -2921,15 +3023,19 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
2921 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || | 3023 | } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || |
2922 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) | 3024 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) |
2923 | return -EBUSY; | 3025 | return -EBUSY; |
2924 | else if (cmd_match(page, "resync") || cmd_match(page, "recover")) | 3026 | else if (cmd_match(page, "resync")) |
3027 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3028 | else if (cmd_match(page, "recover")) { | ||
3029 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
2925 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3030 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2926 | else if (cmd_match(page, "reshape")) { | 3031 | } else if (cmd_match(page, "reshape")) { |
2927 | int err; | 3032 | int err; |
2928 | if (mddev->pers->start_reshape == NULL) | 3033 | if (mddev->pers->start_reshape == NULL) |
2929 | return -EINVAL; | 3034 | return -EINVAL; |
2930 | err = mddev->pers->start_reshape(mddev); | 3035 | err = mddev->pers->start_reshape(mddev); |
2931 | if (err) | 3036 | if (err) |
2932 | return err; | 3037 | return err; |
3038 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | ||
2933 | } else { | 3039 | } else { |
2934 | if (cmd_match(page, "check")) | 3040 | if (cmd_match(page, "check")) |
2935 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 3041 | set_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
@@ -2940,6 +3046,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) | |||
2940 | } | 3046 | } |
2941 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3047 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2942 | md_wakeup_thread(mddev->thread); | 3048 | md_wakeup_thread(mddev->thread); |
3049 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
2943 | return len; | 3050 | return len; |
2944 | } | 3051 | } |
2945 | 3052 | ||
@@ -3049,11 +3156,11 @@ static ssize_t | |||
3049 | sync_speed_show(mddev_t *mddev, char *page) | 3156 | sync_speed_show(mddev_t *mddev, char *page) |
3050 | { | 3157 | { |
3051 | unsigned long resync, dt, db; | 3158 | unsigned long resync, dt, db; |
3052 | resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)); | 3159 | resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); |
3053 | dt = ((jiffies - mddev->resync_mark) / HZ); | 3160 | dt = (jiffies - mddev->resync_mark) / HZ; |
3054 | if (!dt) dt++; | 3161 | if (!dt) dt++; |
3055 | db = resync - (mddev->resync_mark_cnt); | 3162 | db = resync - mddev->resync_mark_cnt; |
3056 | return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ | 3163 | return sprintf(page, "%lu\n", db/dt/2); /* K/sec */ |
3057 | } | 3164 | } |
3058 | 3165 | ||
3059 | static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); | 3166 | static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); |
@@ -3075,6 +3182,36 @@ sync_completed_show(mddev_t *mddev, char *page) | |||
3075 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); | 3182 | static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); |
3076 | 3183 | ||
3077 | static ssize_t | 3184 | static ssize_t |
3185 | min_sync_show(mddev_t *mddev, char *page) | ||
3186 | { | ||
3187 | return sprintf(page, "%llu\n", | ||
3188 | (unsigned long long)mddev->resync_min); | ||
3189 | } | ||
3190 | static ssize_t | ||
3191 | min_sync_store(mddev_t *mddev, const char *buf, size_t len) | ||
3192 | { | ||
3193 | unsigned long long min; | ||
3194 | if (strict_strtoull(buf, 10, &min)) | ||
3195 | return -EINVAL; | ||
3196 | if (min > mddev->resync_max) | ||
3197 | return -EINVAL; | ||
3198 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | ||
3199 | return -EBUSY; | ||
3200 | |||
3201 | /* Must be a multiple of chunk_size */ | ||
3202 | if (mddev->chunk_size) { | ||
3203 | if (min & (sector_t)((mddev->chunk_size>>9)-1)) | ||
3204 | return -EINVAL; | ||
3205 | } | ||
3206 | mddev->resync_min = min; | ||
3207 | |||
3208 | return len; | ||
3209 | } | ||
3210 | |||
3211 | static struct md_sysfs_entry md_min_sync = | ||
3212 | __ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store); | ||
3213 | |||
3214 | static ssize_t | ||
3078 | max_sync_show(mddev_t *mddev, char *page) | 3215 | max_sync_show(mddev_t *mddev, char *page) |
3079 | { | 3216 | { |
3080 | if (mddev->resync_max == MaxSector) | 3217 | if (mddev->resync_max == MaxSector) |
@@ -3089,9 +3226,10 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
3089 | if (strncmp(buf, "max", 3) == 0) | 3226 | if (strncmp(buf, "max", 3) == 0) |
3090 | mddev->resync_max = MaxSector; | 3227 | mddev->resync_max = MaxSector; |
3091 | else { | 3228 | else { |
3092 | char *ep; | 3229 | unsigned long long max; |
3093 | unsigned long long max = simple_strtoull(buf, &ep, 10); | 3230 | if (strict_strtoull(buf, 10, &max)) |
3094 | if (ep == buf || (*ep != 0 && *ep != '\n')) | 3231 | return -EINVAL; |
3232 | if (max < mddev->resync_min) | ||
3095 | return -EINVAL; | 3233 | return -EINVAL; |
3096 | if (max < mddev->resync_max && | 3234 | if (max < mddev->resync_max && |
3097 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3235 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
@@ -3222,6 +3360,7 @@ static struct attribute *md_redundancy_attrs[] = { | |||
3222 | &md_sync_speed.attr, | 3360 | &md_sync_speed.attr, |
3223 | &md_sync_force_parallel.attr, | 3361 | &md_sync_force_parallel.attr, |
3224 | &md_sync_completed.attr, | 3362 | &md_sync_completed.attr, |
3363 | &md_min_sync.attr, | ||
3225 | &md_max_sync.attr, | 3364 | &md_max_sync.attr, |
3226 | &md_suspend_lo.attr, | 3365 | &md_suspend_lo.attr, |
3227 | &md_suspend_hi.attr, | 3366 | &md_suspend_hi.attr, |
@@ -3326,9 +3465,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
3326 | disk->queue = mddev->queue; | 3465 | disk->queue = mddev->queue; |
3327 | add_disk(disk); | 3466 | add_disk(disk); |
3328 | mddev->gendisk = disk; | 3467 | mddev->gendisk = disk; |
3329 | mutex_unlock(&disks_mutex); | ||
3330 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, | 3468 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, |
3331 | "%s", "md"); | 3469 | "%s", "md"); |
3470 | mutex_unlock(&disks_mutex); | ||
3332 | if (error) | 3471 | if (error) |
3333 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | 3472 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", |
3334 | disk->disk_name); | 3473 | disk->disk_name); |
@@ -3341,7 +3480,11 @@ static void md_safemode_timeout(unsigned long data) | |||
3341 | { | 3480 | { |
3342 | mddev_t *mddev = (mddev_t *) data; | 3481 | mddev_t *mddev = (mddev_t *) data; |
3343 | 3482 | ||
3344 | mddev->safemode = 1; | 3483 | if (!atomic_read(&mddev->writes_pending)) { |
3484 | mddev->safemode = 1; | ||
3485 | if (mddev->external) | ||
3486 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
3487 | } | ||
3345 | md_wakeup_thread(mddev->thread); | 3488 | md_wakeup_thread(mddev->thread); |
3346 | } | 3489 | } |
3347 | 3490 | ||
@@ -3432,22 +3575,23 @@ static int do_md_run(mddev_t * mddev) | |||
3432 | * We don't want the data to overlap the metadata, | 3575 | * We don't want the data to overlap the metadata, |
3433 | * Internal Bitmap issues has handled elsewhere. | 3576 | * Internal Bitmap issues has handled elsewhere. |
3434 | */ | 3577 | */ |
3435 | if (rdev->data_offset < rdev->sb_offset) { | 3578 | if (rdev->data_offset < rdev->sb_start) { |
3436 | if (mddev->size && | 3579 | if (mddev->size && |
3437 | rdev->data_offset + mddev->size*2 | 3580 | rdev->data_offset + mddev->size*2 |
3438 | > rdev->sb_offset*2) { | 3581 | > rdev->sb_start) { |
3439 | printk("md: %s: data overlaps metadata\n", | 3582 | printk("md: %s: data overlaps metadata\n", |
3440 | mdname(mddev)); | 3583 | mdname(mddev)); |
3441 | return -EINVAL; | 3584 | return -EINVAL; |
3442 | } | 3585 | } |
3443 | } else { | 3586 | } else { |
3444 | if (rdev->sb_offset*2 + rdev->sb_size/512 | 3587 | if (rdev->sb_start + rdev->sb_size/512 |
3445 | > rdev->data_offset) { | 3588 | > rdev->data_offset) { |
3446 | printk("md: %s: metadata overlaps data\n", | 3589 | printk("md: %s: metadata overlaps data\n", |
3447 | mdname(mddev)); | 3590 | mdname(mddev)); |
3448 | return -EINVAL; | 3591 | return -EINVAL; |
3449 | } | 3592 | } |
3450 | } | 3593 | } |
3594 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
3451 | } | 3595 | } |
3452 | 3596 | ||
3453 | md_probe(mddev->unit, NULL, NULL); | 3597 | md_probe(mddev->unit, NULL, NULL); |
@@ -3519,7 +3663,9 @@ static int do_md_run(mddev_t * mddev) | |||
3519 | mddev->ro = 2; /* read-only, but switch on first write */ | 3663 | mddev->ro = 2; /* read-only, but switch on first write */ |
3520 | 3664 | ||
3521 | err = mddev->pers->run(mddev); | 3665 | err = mddev->pers->run(mddev); |
3522 | if (!err && mddev->pers->sync_request) { | 3666 | if (err) |
3667 | printk(KERN_ERR "md: pers->run() failed ...\n"); | ||
3668 | else if (mddev->pers->sync_request) { | ||
3523 | err = bitmap_create(mddev); | 3669 | err = bitmap_create(mddev); |
3524 | if (err) { | 3670 | if (err) { |
3525 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", | 3671 | printk(KERN_ERR "%s: failed to create bitmap (%d)\n", |
@@ -3528,7 +3674,6 @@ static int do_md_run(mddev_t * mddev) | |||
3528 | } | 3674 | } |
3529 | } | 3675 | } |
3530 | if (err) { | 3676 | if (err) { |
3531 | printk(KERN_ERR "md: pers->run() failed ...\n"); | ||
3532 | module_put(mddev->pers->owner); | 3677 | module_put(mddev->pers->owner); |
3533 | mddev->pers = NULL; | 3678 | mddev->pers = NULL; |
3534 | bitmap_destroy(mddev); | 3679 | bitmap_destroy(mddev); |
@@ -3563,7 +3708,7 @@ static int do_md_run(mddev_t * mddev) | |||
3563 | if (mddev->flags) | 3708 | if (mddev->flags) |
3564 | md_update_sb(mddev, 0); | 3709 | md_update_sb(mddev, 0); |
3565 | 3710 | ||
3566 | set_capacity(disk, mddev->array_size<<1); | 3711 | set_capacity(disk, mddev->array_sectors); |
3567 | 3712 | ||
3568 | /* If we call blk_queue_make_request here, it will | 3713 | /* If we call blk_queue_make_request here, it will |
3569 | * re-initialise max_sectors etc which may have been | 3714 | * re-initialise max_sectors etc which may have been |
@@ -3608,6 +3753,9 @@ static int do_md_run(mddev_t * mddev) | |||
3608 | 3753 | ||
3609 | mddev->changed = 1; | 3754 | mddev->changed = 1; |
3610 | md_new_event(mddev); | 3755 | md_new_event(mddev); |
3756 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
3757 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
3758 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | ||
3611 | kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); | 3759 | kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); |
3612 | return 0; | 3760 | return 0; |
3613 | } | 3761 | } |
@@ -3615,38 +3763,25 @@ static int do_md_run(mddev_t * mddev) | |||
3615 | static int restart_array(mddev_t *mddev) | 3763 | static int restart_array(mddev_t *mddev) |
3616 | { | 3764 | { |
3617 | struct gendisk *disk = mddev->gendisk; | 3765 | struct gendisk *disk = mddev->gendisk; |
3618 | int err; | ||
3619 | 3766 | ||
3620 | /* | 3767 | /* Complain if it has no devices */ |
3621 | * Complain if it has no devices | ||
3622 | */ | ||
3623 | err = -ENXIO; | ||
3624 | if (list_empty(&mddev->disks)) | 3768 | if (list_empty(&mddev->disks)) |
3625 | goto out; | 3769 | return -ENXIO; |
3626 | 3770 | if (!mddev->pers) | |
3627 | if (mddev->pers) { | 3771 | return -EINVAL; |
3628 | err = -EBUSY; | 3772 | if (!mddev->ro) |
3629 | if (!mddev->ro) | 3773 | return -EBUSY; |
3630 | goto out; | 3774 | mddev->safemode = 0; |
3631 | 3775 | mddev->ro = 0; | |
3632 | mddev->safemode = 0; | 3776 | set_disk_ro(disk, 0); |
3633 | mddev->ro = 0; | 3777 | printk(KERN_INFO "md: %s switched to read-write mode.\n", |
3634 | set_disk_ro(disk, 0); | 3778 | mdname(mddev)); |
3635 | 3779 | /* Kick recovery or resync if necessary */ | |
3636 | printk(KERN_INFO "md: %s switched to read-write mode.\n", | 3780 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
3637 | mdname(mddev)); | 3781 | md_wakeup_thread(mddev->thread); |
3638 | /* | 3782 | md_wakeup_thread(mddev->sync_thread); |
3639 | * Kick recovery or resync if necessary | 3783 | sysfs_notify(&mddev->kobj, NULL, "array_state"); |
3640 | */ | 3784 | return 0; |
3641 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
3642 | md_wakeup_thread(mddev->thread); | ||
3643 | md_wakeup_thread(mddev->sync_thread); | ||
3644 | err = 0; | ||
3645 | } else | ||
3646 | err = -EINVAL; | ||
3647 | |||
3648 | out: | ||
3649 | return err; | ||
3650 | } | 3785 | } |
3651 | 3786 | ||
3652 | /* similar to deny_write_access, but accounts for our holding a reference | 3787 | /* similar to deny_write_access, but accounts for our holding a reference |
@@ -3680,16 +3815,17 @@ static void restore_bitmap_write_access(struct file *file) | |||
3680 | * 1 - switch to readonly | 3815 | * 1 - switch to readonly |
3681 | * 2 - stop but do not disassemble array | 3816 | * 2 - stop but do not disassemble array |
3682 | */ | 3817 | */ |
3683 | static int do_md_stop(mddev_t * mddev, int mode) | 3818 | static int do_md_stop(mddev_t * mddev, int mode, int is_open) |
3684 | { | 3819 | { |
3685 | int err = 0; | 3820 | int err = 0; |
3686 | struct gendisk *disk = mddev->gendisk; | 3821 | struct gendisk *disk = mddev->gendisk; |
3687 | 3822 | ||
3823 | if (atomic_read(&mddev->openers) > is_open) { | ||
3824 | printk("md: %s still in use.\n",mdname(mddev)); | ||
3825 | return -EBUSY; | ||
3826 | } | ||
3827 | |||
3688 | if (mddev->pers) { | 3828 | if (mddev->pers) { |
3689 | if (atomic_read(&mddev->active)>2) { | ||
3690 | printk("md: %s still in use.\n",mdname(mddev)); | ||
3691 | return -EBUSY; | ||
3692 | } | ||
3693 | 3829 | ||
3694 | if (mddev->sync_thread) { | 3830 | if (mddev->sync_thread) { |
3695 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 3831 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -3773,10 +3909,11 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3773 | 3909 | ||
3774 | export_array(mddev); | 3910 | export_array(mddev); |
3775 | 3911 | ||
3776 | mddev->array_size = 0; | 3912 | mddev->array_sectors = 0; |
3777 | mddev->size = 0; | 3913 | mddev->size = 0; |
3778 | mddev->raid_disks = 0; | 3914 | mddev->raid_disks = 0; |
3779 | mddev->recovery_cp = 0; | 3915 | mddev->recovery_cp = 0; |
3916 | mddev->resync_min = 0; | ||
3780 | mddev->resync_max = MaxSector; | 3917 | mddev->resync_max = MaxSector; |
3781 | mddev->reshape_position = MaxSector; | 3918 | mddev->reshape_position = MaxSector; |
3782 | mddev->external = 0; | 3919 | mddev->external = 0; |
@@ -3811,6 +3948,7 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3811 | mdname(mddev)); | 3948 | mdname(mddev)); |
3812 | err = 0; | 3949 | err = 0; |
3813 | md_new_event(mddev); | 3950 | md_new_event(mddev); |
3951 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
3814 | out: | 3952 | out: |
3815 | return err; | 3953 | return err; |
3816 | } | 3954 | } |
@@ -3836,7 +3974,7 @@ static void autorun_array(mddev_t *mddev) | |||
3836 | err = do_md_run (mddev); | 3974 | err = do_md_run (mddev); |
3837 | if (err) { | 3975 | if (err) { |
3838 | printk(KERN_WARNING "md: do_md_run() returned %d\n", err); | 3976 | printk(KERN_WARNING "md: do_md_run() returned %d\n", err); |
3839 | do_md_stop (mddev, 0); | 3977 | do_md_stop (mddev, 0, 0); |
3840 | } | 3978 | } |
3841 | } | 3979 | } |
3842 | 3980 | ||
@@ -3927,8 +4065,10 @@ static void autorun_devices(int part) | |||
3927 | /* on success, candidates will be empty, on error | 4065 | /* on success, candidates will be empty, on error |
3928 | * it won't... | 4066 | * it won't... |
3929 | */ | 4067 | */ |
3930 | rdev_for_each_list(rdev, tmp, candidates) | 4068 | rdev_for_each_list(rdev, tmp, candidates) { |
4069 | list_del_init(&rdev->same_set); | ||
3931 | export_rdev(rdev); | 4070 | export_rdev(rdev); |
4071 | } | ||
3932 | mddev_put(mddev); | 4072 | mddev_put(mddev); |
3933 | } | 4073 | } |
3934 | printk(KERN_INFO "md: ... autorun DONE.\n"); | 4074 | printk(KERN_INFO "md: ... autorun DONE.\n"); |
@@ -4009,9 +4149,11 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) | |||
4009 | char *ptr, *buf = NULL; | 4149 | char *ptr, *buf = NULL; |
4010 | int err = -ENOMEM; | 4150 | int err = -ENOMEM; |
4011 | 4151 | ||
4012 | md_allow_write(mddev); | 4152 | if (md_allow_write(mddev)) |
4153 | file = kmalloc(sizeof(*file), GFP_NOIO); | ||
4154 | else | ||
4155 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
4013 | 4156 | ||
4014 | file = kmalloc(sizeof(*file), GFP_KERNEL); | ||
4015 | if (!file) | 4157 | if (!file) |
4016 | goto out; | 4158 | goto out; |
4017 | 4159 | ||
@@ -4044,15 +4186,12 @@ out: | |||
4044 | static int get_disk_info(mddev_t * mddev, void __user * arg) | 4186 | static int get_disk_info(mddev_t * mddev, void __user * arg) |
4045 | { | 4187 | { |
4046 | mdu_disk_info_t info; | 4188 | mdu_disk_info_t info; |
4047 | unsigned int nr; | ||
4048 | mdk_rdev_t *rdev; | 4189 | mdk_rdev_t *rdev; |
4049 | 4190 | ||
4050 | if (copy_from_user(&info, arg, sizeof(info))) | 4191 | if (copy_from_user(&info, arg, sizeof(info))) |
4051 | return -EFAULT; | 4192 | return -EFAULT; |
4052 | 4193 | ||
4053 | nr = info.number; | 4194 | rdev = find_rdev_nr(mddev, info.number); |
4054 | |||
4055 | rdev = find_rdev_nr(mddev, nr); | ||
4056 | if (rdev) { | 4195 | if (rdev) { |
4057 | info.major = MAJOR(rdev->bdev->bd_dev); | 4196 | info.major = MAJOR(rdev->bdev->bd_dev); |
4058 | info.minor = MINOR(rdev->bdev->bd_dev); | 4197 | info.minor = MINOR(rdev->bdev->bd_dev); |
@@ -4172,8 +4311,12 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4172 | } | 4311 | } |
4173 | if (err) | 4312 | if (err) |
4174 | export_rdev(rdev); | 4313 | export_rdev(rdev); |
4314 | else | ||
4315 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
4175 | 4316 | ||
4176 | md_update_sb(mddev, 1); | 4317 | md_update_sb(mddev, 1); |
4318 | if (mddev->degraded) | ||
4319 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
4177 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4320 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
4178 | md_wakeup_thread(mddev->thread); | 4321 | md_wakeup_thread(mddev->thread); |
4179 | return err; | 4322 | return err; |
@@ -4212,10 +4355,10 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
4212 | 4355 | ||
4213 | if (!mddev->persistent) { | 4356 | if (!mddev->persistent) { |
4214 | printk(KERN_INFO "md: nonpersistent superblock ...\n"); | 4357 | printk(KERN_INFO "md: nonpersistent superblock ...\n"); |
4215 | rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | 4358 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4216 | } else | 4359 | } else |
4217 | rdev->sb_offset = calc_dev_sboffset(rdev->bdev); | 4360 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
4218 | rdev->size = calc_dev_size(rdev, mddev->chunk_size); | 4361 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; |
4219 | 4362 | ||
4220 | err = bind_rdev_to_array(rdev, mddev); | 4363 | err = bind_rdev_to_array(rdev, mddev); |
4221 | if (err) { | 4364 | if (err) { |
@@ -4232,9 +4375,6 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev) | |||
4232 | char b[BDEVNAME_SIZE]; | 4375 | char b[BDEVNAME_SIZE]; |
4233 | mdk_rdev_t *rdev; | 4376 | mdk_rdev_t *rdev; |
4234 | 4377 | ||
4235 | if (!mddev->pers) | ||
4236 | return -ENODEV; | ||
4237 | |||
4238 | rdev = find_rdev(mddev, dev); | 4378 | rdev = find_rdev(mddev, dev); |
4239 | if (!rdev) | 4379 | if (!rdev) |
4240 | return -ENXIO; | 4380 | return -ENXIO; |
@@ -4257,7 +4397,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
4257 | { | 4397 | { |
4258 | char b[BDEVNAME_SIZE]; | 4398 | char b[BDEVNAME_SIZE]; |
4259 | int err; | 4399 | int err; |
4260 | unsigned int size; | ||
4261 | mdk_rdev_t *rdev; | 4400 | mdk_rdev_t *rdev; |
4262 | 4401 | ||
4263 | if (!mddev->pers) | 4402 | if (!mddev->pers) |
@@ -4285,13 +4424,11 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) | |||
4285 | } | 4424 | } |
4286 | 4425 | ||
4287 | if (mddev->persistent) | 4426 | if (mddev->persistent) |
4288 | rdev->sb_offset = calc_dev_sboffset(rdev->bdev); | 4427 | rdev->sb_start = calc_dev_sboffset(rdev->bdev); |
4289 | else | 4428 | else |
4290 | rdev->sb_offset = | 4429 | rdev->sb_start = rdev->bdev->bd_inode->i_size / 512; |
4291 | rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; | ||
4292 | 4430 | ||
4293 | size = calc_dev_size(rdev, mddev->chunk_size); | 4431 | rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2; |
4294 | rdev->size = size; | ||
4295 | 4432 | ||
4296 | if (test_bit(Faulty, &rdev->flags)) { | 4433 | if (test_bit(Faulty, &rdev->flags)) { |
4297 | printk(KERN_WARNING | 4434 | printk(KERN_WARNING |
@@ -4476,24 +4613,24 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4476 | return 0; | 4613 | return 0; |
4477 | } | 4614 | } |
4478 | 4615 | ||
4479 | static int update_size(mddev_t *mddev, unsigned long size) | 4616 | static int update_size(mddev_t *mddev, sector_t num_sectors) |
4480 | { | 4617 | { |
4481 | mdk_rdev_t * rdev; | 4618 | mdk_rdev_t * rdev; |
4482 | int rv; | 4619 | int rv; |
4483 | struct list_head *tmp; | 4620 | struct list_head *tmp; |
4484 | int fit = (size == 0); | 4621 | int fit = (num_sectors == 0); |
4485 | 4622 | ||
4486 | if (mddev->pers->resize == NULL) | 4623 | if (mddev->pers->resize == NULL) |
4487 | return -EINVAL; | 4624 | return -EINVAL; |
4488 | /* The "size" is the amount of each device that is used. | 4625 | /* The "num_sectors" is the number of sectors of each device that |
4489 | * This can only make sense for arrays with redundancy. | 4626 | * is used. This can only make sense for arrays with redundancy. |
4490 | * linear and raid0 always use whatever space is available | 4627 | * linear and raid0 always use whatever space is available. We can only |
4491 | * We can only consider changing the size if no resync | 4628 | * consider changing this number if no resync or reconstruction is |
4492 | * or reconstruction is happening, and if the new size | 4629 | * happening, and if the new size is acceptable. It must fit before the |
4493 | * is acceptable. It must fit before the sb_offset or, | 4630 | * sb_start or, if that is <data_offset, it must fit before the size |
4494 | * if that is <data_offset, it must fit before the | 4631 | * of each device. If num_sectors is zero, we find the largest size |
4495 | * size of each device. | 4632 | * that fits. |
4496 | * If size is zero, we find the largest size that fits. | 4633 | |
4497 | */ | 4634 | */ |
4498 | if (mddev->sync_thread) | 4635 | if (mddev->sync_thread) |
4499 | return -EBUSY; | 4636 | return -EBUSY; |
@@ -4501,19 +4638,20 @@ static int update_size(mddev_t *mddev, unsigned long size) | |||
4501 | sector_t avail; | 4638 | sector_t avail; |
4502 | avail = rdev->size * 2; | 4639 | avail = rdev->size * 2; |
4503 | 4640 | ||
4504 | if (fit && (size == 0 || size > avail/2)) | 4641 | if (fit && (num_sectors == 0 || num_sectors > avail)) |
4505 | size = avail/2; | 4642 | num_sectors = avail; |
4506 | if (avail < ((sector_t)size << 1)) | 4643 | if (avail < num_sectors) |
4507 | return -ENOSPC; | 4644 | return -ENOSPC; |
4508 | } | 4645 | } |
4509 | rv = mddev->pers->resize(mddev, (sector_t)size *2); | 4646 | rv = mddev->pers->resize(mddev, num_sectors); |
4510 | if (!rv) { | 4647 | if (!rv) { |
4511 | struct block_device *bdev; | 4648 | struct block_device *bdev; |
4512 | 4649 | ||
4513 | bdev = bdget_disk(mddev->gendisk, 0); | 4650 | bdev = bdget_disk(mddev->gendisk, 0); |
4514 | if (bdev) { | 4651 | if (bdev) { |
4515 | mutex_lock(&bdev->bd_inode->i_mutex); | 4652 | mutex_lock(&bdev->bd_inode->i_mutex); |
4516 | i_size_write(bdev->bd_inode, (loff_t)mddev->array_size << 10); | 4653 | i_size_write(bdev->bd_inode, |
4654 | (loff_t)mddev->array_sectors << 9); | ||
4517 | mutex_unlock(&bdev->bd_inode->i_mutex); | 4655 | mutex_unlock(&bdev->bd_inode->i_mutex); |
4518 | bdput(bdev); | 4656 | bdput(bdev); |
4519 | } | 4657 | } |
@@ -4588,7 +4726,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) | |||
4588 | return mddev->pers->reconfig(mddev, info->layout, -1); | 4726 | return mddev->pers->reconfig(mddev, info->layout, -1); |
4589 | } | 4727 | } |
4590 | if (info->size >= 0 && mddev->size != info->size) | 4728 | if (info->size >= 0 && mddev->size != info->size) |
4591 | rv = update_size(mddev, info->size); | 4729 | rv = update_size(mddev, (sector_t)info->size * 2); |
4592 | 4730 | ||
4593 | if (mddev->raid_disks != info->raid_disks) | 4731 | if (mddev->raid_disks != info->raid_disks) |
4594 | rv = update_raid_disks(mddev, info->raid_disks); | 4732 | rv = update_raid_disks(mddev, info->raid_disks); |
@@ -4641,6 +4779,12 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) | |||
4641 | return 0; | 4779 | return 0; |
4642 | } | 4780 | } |
4643 | 4781 | ||
4782 | /* | ||
4783 | * We have a problem here : there is no easy way to give a CHS | ||
4784 | * virtual geometry. We currently pretend that we have a 2 heads | ||
4785 | * 4 sectors (with a BIG number of cylinders...). This drives | ||
4786 | * dosfs just mad... ;-) | ||
4787 | */ | ||
4644 | static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) | 4788 | static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
4645 | { | 4789 | { |
4646 | mddev_t *mddev = bdev->bd_disk->private_data; | 4790 | mddev_t *mddev = bdev->bd_disk->private_data; |
@@ -4785,19 +4929,13 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4785 | goto done_unlock; | 4929 | goto done_unlock; |
4786 | 4930 | ||
4787 | case STOP_ARRAY: | 4931 | case STOP_ARRAY: |
4788 | err = do_md_stop (mddev, 0); | 4932 | err = do_md_stop (mddev, 0, 1); |
4789 | goto done_unlock; | 4933 | goto done_unlock; |
4790 | 4934 | ||
4791 | case STOP_ARRAY_RO: | 4935 | case STOP_ARRAY_RO: |
4792 | err = do_md_stop (mddev, 1); | 4936 | err = do_md_stop (mddev, 1, 1); |
4793 | goto done_unlock; | 4937 | goto done_unlock; |
4794 | 4938 | ||
4795 | /* | ||
4796 | * We have a problem here : there is no easy way to give a CHS | ||
4797 | * virtual geometry. We currently pretend that we have a 2 heads | ||
4798 | * 4 sectors (with a BIG number of cylinders...). This drives | ||
4799 | * dosfs just mad... ;-) | ||
4800 | */ | ||
4801 | } | 4939 | } |
4802 | 4940 | ||
4803 | /* | 4941 | /* |
@@ -4807,13 +4945,12 @@ static int md_ioctl(struct inode *inode, struct file *file, | |||
4807 | * here and hit the 'default' below, so only disallow | 4945 | * here and hit the 'default' below, so only disallow |
4808 | * 'md' ioctls, and switch to rw mode if started auto-readonly. | 4946 | * 'md' ioctls, and switch to rw mode if started auto-readonly. |
4809 | */ | 4947 | */ |
4810 | if (_IOC_TYPE(cmd) == MD_MAJOR && | 4948 | if (_IOC_TYPE(cmd) == MD_MAJOR && mddev->ro && mddev->pers) { |
4811 | mddev->ro && mddev->pers) { | ||
4812 | if (mddev->ro == 2) { | 4949 | if (mddev->ro == 2) { |
4813 | mddev->ro = 0; | 4950 | mddev->ro = 0; |
4814 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 4951 | sysfs_notify(&mddev->kobj, NULL, "array_state"); |
4815 | md_wakeup_thread(mddev->thread); | 4952 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
4816 | 4953 | md_wakeup_thread(mddev->thread); | |
4817 | } else { | 4954 | } else { |
4818 | err = -EROFS; | 4955 | err = -EROFS; |
4819 | goto abort_unlock; | 4956 | goto abort_unlock; |
@@ -4883,6 +5020,7 @@ static int md_open(struct inode *inode, struct file *file) | |||
4883 | 5020 | ||
4884 | err = 0; | 5021 | err = 0; |
4885 | mddev_get(mddev); | 5022 | mddev_get(mddev); |
5023 | atomic_inc(&mddev->openers); | ||
4886 | mddev_unlock(mddev); | 5024 | mddev_unlock(mddev); |
4887 | 5025 | ||
4888 | check_disk_change(inode->i_bdev); | 5026 | check_disk_change(inode->i_bdev); |
@@ -4895,6 +5033,7 @@ static int md_release(struct inode *inode, struct file * file) | |||
4895 | mddev_t *mddev = inode->i_bdev->bd_disk->private_data; | 5033 | mddev_t *mddev = inode->i_bdev->bd_disk->private_data; |
4896 | 5034 | ||
4897 | BUG_ON(!mddev); | 5035 | BUG_ON(!mddev); |
5036 | atomic_dec(&mddev->openers); | ||
4898 | mddev_put(mddev); | 5037 | mddev_put(mddev); |
4899 | 5038 | ||
4900 | return 0; | 5039 | return 0; |
@@ -5029,6 +5168,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) | |||
5029 | if (!mddev->pers->error_handler) | 5168 | if (!mddev->pers->error_handler) |
5030 | return; | 5169 | return; |
5031 | mddev->pers->error_handler(mddev,rdev); | 5170 | mddev->pers->error_handler(mddev,rdev); |
5171 | if (mddev->degraded) | ||
5172 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
5173 | set_bit(StateChanged, &rdev->flags); | ||
5032 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5174 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5033 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5175 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5034 | md_wakeup_thread(mddev->thread); | 5176 | md_wakeup_thread(mddev->thread); |
@@ -5258,10 +5400,11 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5258 | if (!list_empty(&mddev->disks)) { | 5400 | if (!list_empty(&mddev->disks)) { |
5259 | if (mddev->pers) | 5401 | if (mddev->pers) |
5260 | seq_printf(seq, "\n %llu blocks", | 5402 | seq_printf(seq, "\n %llu blocks", |
5261 | (unsigned long long)mddev->array_size); | 5403 | (unsigned long long) |
5404 | mddev->array_sectors / 2); | ||
5262 | else | 5405 | else |
5263 | seq_printf(seq, "\n %llu blocks", | 5406 | seq_printf(seq, "\n %llu blocks", |
5264 | (unsigned long long)size); | 5407 | (unsigned long long)size); |
5265 | } | 5408 | } |
5266 | if (mddev->persistent) { | 5409 | if (mddev->persistent) { |
5267 | if (mddev->major_version != 0 || | 5410 | if (mddev->major_version != 0 || |
@@ -5391,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p) | |||
5391 | static int is_mddev_idle(mddev_t *mddev) | 5534 | static int is_mddev_idle(mddev_t *mddev) |
5392 | { | 5535 | { |
5393 | mdk_rdev_t * rdev; | 5536 | mdk_rdev_t * rdev; |
5394 | struct list_head *tmp; | ||
5395 | int idle; | 5537 | int idle; |
5396 | long curr_events; | 5538 | long curr_events; |
5397 | 5539 | ||
5398 | idle = 1; | 5540 | idle = 1; |
5399 | rdev_for_each(rdev, tmp, mddev) { | 5541 | rcu_read_lock(); |
5542 | rdev_for_each_rcu(rdev, mddev) { | ||
5400 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; | 5543 | struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; |
5401 | curr_events = disk_stat_read(disk, sectors[0]) + | 5544 | curr_events = disk_stat_read(disk, sectors[0]) + |
5402 | disk_stat_read(disk, sectors[1]) - | 5545 | disk_stat_read(disk, sectors[1]) - |
@@ -5428,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev) | |||
5428 | idle = 0; | 5571 | idle = 0; |
5429 | } | 5572 | } |
5430 | } | 5573 | } |
5574 | rcu_read_unlock(); | ||
5431 | return idle; | 5575 | return idle; |
5432 | } | 5576 | } |
5433 | 5577 | ||
@@ -5451,6 +5595,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
5451 | */ | 5595 | */ |
5452 | void md_write_start(mddev_t *mddev, struct bio *bi) | 5596 | void md_write_start(mddev_t *mddev, struct bio *bi) |
5453 | { | 5597 | { |
5598 | int did_change = 0; | ||
5454 | if (bio_data_dir(bi) != WRITE) | 5599 | if (bio_data_dir(bi) != WRITE) |
5455 | return; | 5600 | return; |
5456 | 5601 | ||
@@ -5461,6 +5606,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
5461 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5606 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
5462 | md_wakeup_thread(mddev->thread); | 5607 | md_wakeup_thread(mddev->thread); |
5463 | md_wakeup_thread(mddev->sync_thread); | 5608 | md_wakeup_thread(mddev->sync_thread); |
5609 | did_change = 1; | ||
5464 | } | 5610 | } |
5465 | atomic_inc(&mddev->writes_pending); | 5611 | atomic_inc(&mddev->writes_pending); |
5466 | if (mddev->safemode == 1) | 5612 | if (mddev->safemode == 1) |
@@ -5471,10 +5617,12 @@ void md_write_start(mddev_t *mddev, struct bio *bi) | |||
5471 | mddev->in_sync = 0; | 5617 | mddev->in_sync = 0; |
5472 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 5618 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); |
5473 | md_wakeup_thread(mddev->thread); | 5619 | md_wakeup_thread(mddev->thread); |
5620 | did_change = 1; | ||
5474 | } | 5621 | } |
5475 | spin_unlock_irq(&mddev->write_lock); | 5622 | spin_unlock_irq(&mddev->write_lock); |
5476 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
5477 | } | 5623 | } |
5624 | if (did_change) | ||
5625 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
5478 | wait_event(mddev->sb_wait, | 5626 | wait_event(mddev->sb_wait, |
5479 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | 5627 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && |
5480 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | 5628 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); |
@@ -5495,13 +5643,18 @@ void md_write_end(mddev_t *mddev) | |||
5495 | * may proceed without blocking. It is important to call this before | 5643 | * may proceed without blocking. It is important to call this before |
5496 | * attempting a GFP_KERNEL allocation while holding the mddev lock. | 5644 | * attempting a GFP_KERNEL allocation while holding the mddev lock. |
5497 | * Must be called with mddev_lock held. | 5645 | * Must be called with mddev_lock held. |
5646 | * | ||
5647 | * In the ->external case MD_CHANGE_CLEAN can not be cleared until mddev->lock | ||
5648 | * is dropped, so return -EAGAIN after notifying userspace. | ||
5498 | */ | 5649 | */ |
5499 | void md_allow_write(mddev_t *mddev) | 5650 | int md_allow_write(mddev_t *mddev) |
5500 | { | 5651 | { |
5501 | if (!mddev->pers) | 5652 | if (!mddev->pers) |
5502 | return; | 5653 | return 0; |
5503 | if (mddev->ro) | 5654 | if (mddev->ro) |
5504 | return; | 5655 | return 0; |
5656 | if (!mddev->pers->sync_request) | ||
5657 | return 0; | ||
5505 | 5658 | ||
5506 | spin_lock_irq(&mddev->write_lock); | 5659 | spin_lock_irq(&mddev->write_lock); |
5507 | if (mddev->in_sync) { | 5660 | if (mddev->in_sync) { |
@@ -5512,14 +5665,14 @@ void md_allow_write(mddev_t *mddev) | |||
5512 | mddev->safemode = 1; | 5665 | mddev->safemode = 1; |
5513 | spin_unlock_irq(&mddev->write_lock); | 5666 | spin_unlock_irq(&mddev->write_lock); |
5514 | md_update_sb(mddev, 0); | 5667 | md_update_sb(mddev, 0); |
5515 | |||
5516 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | 5668 | sysfs_notify(&mddev->kobj, NULL, "array_state"); |
5517 | /* wait for the dirty state to be recorded in the metadata */ | ||
5518 | wait_event(mddev->sb_wait, | ||
5519 | !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && | ||
5520 | !test_bit(MD_CHANGE_PENDING, &mddev->flags)); | ||
5521 | } else | 5669 | } else |
5522 | spin_unlock_irq(&mddev->write_lock); | 5670 | spin_unlock_irq(&mddev->write_lock); |
5671 | |||
5672 | if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) | ||
5673 | return -EAGAIN; | ||
5674 | else | ||
5675 | return 0; | ||
5523 | } | 5676 | } |
5524 | EXPORT_SYMBOL_GPL(md_allow_write); | 5677 | EXPORT_SYMBOL_GPL(md_allow_write); |
5525 | 5678 | ||
@@ -5625,9 +5778,11 @@ void md_do_sync(mddev_t *mddev) | |||
5625 | max_sectors = mddev->resync_max_sectors; | 5778 | max_sectors = mddev->resync_max_sectors; |
5626 | mddev->resync_mismatches = 0; | 5779 | mddev->resync_mismatches = 0; |
5627 | /* we don't use the checkpoint if there's a bitmap */ | 5780 | /* we don't use the checkpoint if there's a bitmap */ |
5628 | if (!mddev->bitmap && | 5781 | if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) |
5629 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | 5782 | j = mddev->resync_min; |
5783 | else if (!mddev->bitmap) | ||
5630 | j = mddev->recovery_cp; | 5784 | j = mddev->recovery_cp; |
5785 | |||
5631 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 5786 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
5632 | max_sectors = mddev->size << 1; | 5787 | max_sectors = mddev->size << 1; |
5633 | else { | 5788 | else { |
@@ -5796,6 +5951,7 @@ void md_do_sync(mddev_t *mddev) | |||
5796 | 5951 | ||
5797 | skip: | 5952 | skip: |
5798 | mddev->curr_resync = 0; | 5953 | mddev->curr_resync = 0; |
5954 | mddev->resync_min = 0; | ||
5799 | mddev->resync_max = MaxSector; | 5955 | mddev->resync_max = MaxSector; |
5800 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 5956 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
5801 | wake_up(&resync_wait); | 5957 | wake_up(&resync_wait); |
@@ -5845,7 +6001,8 @@ static int remove_and_add_spares(mddev_t *mddev) | |||
5845 | if (rdev->raid_disk < 0 | 6001 | if (rdev->raid_disk < 0 |
5846 | && !test_bit(Faulty, &rdev->flags)) { | 6002 | && !test_bit(Faulty, &rdev->flags)) { |
5847 | rdev->recovery_offset = 0; | 6003 | rdev->recovery_offset = 0; |
5848 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | 6004 | if (mddev->pers-> |
6005 | hot_add_disk(mddev, rdev) == 0) { | ||
5849 | char nm[20]; | 6006 | char nm[20]; |
5850 | sprintf(nm, "rd%d", rdev->raid_disk); | 6007 | sprintf(nm, "rd%d", rdev->raid_disk); |
5851 | if (sysfs_create_link(&mddev->kobj, | 6008 | if (sysfs_create_link(&mddev->kobj, |
@@ -5920,23 +6077,31 @@ void md_check_recovery(mddev_t *mddev) | |||
5920 | int spares = 0; | 6077 | int spares = 0; |
5921 | 6078 | ||
5922 | if (!mddev->external) { | 6079 | if (!mddev->external) { |
6080 | int did_change = 0; | ||
5923 | spin_lock_irq(&mddev->write_lock); | 6081 | spin_lock_irq(&mddev->write_lock); |
5924 | if (mddev->safemode && | 6082 | if (mddev->safemode && |
5925 | !atomic_read(&mddev->writes_pending) && | 6083 | !atomic_read(&mddev->writes_pending) && |
5926 | !mddev->in_sync && | 6084 | !mddev->in_sync && |
5927 | mddev->recovery_cp == MaxSector) { | 6085 | mddev->recovery_cp == MaxSector) { |
5928 | mddev->in_sync = 1; | 6086 | mddev->in_sync = 1; |
6087 | did_change = 1; | ||
5929 | if (mddev->persistent) | 6088 | if (mddev->persistent) |
5930 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 6089 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); |
5931 | } | 6090 | } |
5932 | if (mddev->safemode == 1) | 6091 | if (mddev->safemode == 1) |
5933 | mddev->safemode = 0; | 6092 | mddev->safemode = 0; |
5934 | spin_unlock_irq(&mddev->write_lock); | 6093 | spin_unlock_irq(&mddev->write_lock); |
6094 | if (did_change) | ||
6095 | sysfs_notify(&mddev->kobj, NULL, "array_state"); | ||
5935 | } | 6096 | } |
5936 | 6097 | ||
5937 | if (mddev->flags) | 6098 | if (mddev->flags) |
5938 | md_update_sb(mddev, 0); | 6099 | md_update_sb(mddev, 0); |
5939 | 6100 | ||
6101 | rdev_for_each(rdev, rtmp, mddev) | ||
6102 | if (test_and_clear_bit(StateChanged, &rdev->flags)) | ||
6103 | sysfs_notify(&rdev->kobj, NULL, "state"); | ||
6104 | |||
5940 | 6105 | ||
5941 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | 6106 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && |
5942 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { | 6107 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { |
@@ -5951,7 +6116,9 @@ void md_check_recovery(mddev_t *mddev) | |||
5951 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 6116 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
5952 | /* success...*/ | 6117 | /* success...*/ |
5953 | /* activate any spares */ | 6118 | /* activate any spares */ |
5954 | mddev->pers->spare_active(mddev); | 6119 | if (mddev->pers->spare_active(mddev)) |
6120 | sysfs_notify(&mddev->kobj, NULL, | ||
6121 | "degraded"); | ||
5955 | } | 6122 | } |
5956 | md_update_sb(mddev, 1); | 6123 | md_update_sb(mddev, 1); |
5957 | 6124 | ||
@@ -5965,13 +6132,18 @@ void md_check_recovery(mddev_t *mddev) | |||
5965 | mddev->recovery = 0; | 6132 | mddev->recovery = 0; |
5966 | /* flag recovery needed just to double check */ | 6133 | /* flag recovery needed just to double check */ |
5967 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 6134 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
6135 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
5968 | md_new_event(mddev); | 6136 | md_new_event(mddev); |
5969 | goto unlock; | 6137 | goto unlock; |
5970 | } | 6138 | } |
6139 | /* Set RUNNING before clearing NEEDED to avoid | ||
6140 | * any transients in the value of "sync_action". | ||
6141 | */ | ||
6142 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
6143 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
5971 | /* Clear some bits that don't mean anything, but | 6144 | /* Clear some bits that don't mean anything, but |
5972 | * might be left set | 6145 | * might be left set |
5973 | */ | 6146 | */ |
5974 | clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | ||
5975 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | 6147 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5976 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | 6148 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5977 | 6149 | ||
@@ -5989,17 +6161,19 @@ void md_check_recovery(mddev_t *mddev) | |||
5989 | /* Cannot proceed */ | 6161 | /* Cannot proceed */ |
5990 | goto unlock; | 6162 | goto unlock; |
5991 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 6163 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
6164 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
5992 | } else if ((spares = remove_and_add_spares(mddev))) { | 6165 | } else if ((spares = remove_and_add_spares(mddev))) { |
5993 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 6166 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
5994 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 6167 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
6168 | set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
5995 | } else if (mddev->recovery_cp < MaxSector) { | 6169 | } else if (mddev->recovery_cp < MaxSector) { |
5996 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 6170 | set_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
6171 | clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); | ||
5997 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 6172 | } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
5998 | /* nothing to be done ... */ | 6173 | /* nothing to be done ... */ |
5999 | goto unlock; | 6174 | goto unlock; |
6000 | 6175 | ||
6001 | if (mddev->pers->sync_request) { | 6176 | if (mddev->pers->sync_request) { |
6002 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
6003 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { | 6177 | if (spares && mddev->bitmap && ! mddev->bitmap->file) { |
6004 | /* We are adding a device or devices to an array | 6178 | /* We are adding a device or devices to an array |
6005 | * which has the bitmap stored on all devices. | 6179 | * which has the bitmap stored on all devices. |
@@ -6018,9 +6192,16 @@ void md_check_recovery(mddev_t *mddev) | |||
6018 | mddev->recovery = 0; | 6192 | mddev->recovery = 0; |
6019 | } else | 6193 | } else |
6020 | md_wakeup_thread(mddev->sync_thread); | 6194 | md_wakeup_thread(mddev->sync_thread); |
6195 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
6021 | md_new_event(mddev); | 6196 | md_new_event(mddev); |
6022 | } | 6197 | } |
6023 | unlock: | 6198 | unlock: |
6199 | if (!mddev->sync_thread) { | ||
6200 | clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
6201 | if (test_and_clear_bit(MD_RECOVERY_RECOVER, | ||
6202 | &mddev->recovery)) | ||
6203 | sysfs_notify(&mddev->kobj, NULL, "sync_action"); | ||
6204 | } | ||
6024 | mddev_unlock(mddev); | 6205 | mddev_unlock(mddev); |
6025 | } | 6206 | } |
6026 | } | 6207 | } |
@@ -6047,7 +6228,7 @@ static int md_notify_reboot(struct notifier_block *this, | |||
6047 | 6228 | ||
6048 | for_each_mddev(mddev, tmp) | 6229 | for_each_mddev(mddev, tmp) |
6049 | if (mddev_trylock(mddev)) { | 6230 | if (mddev_trylock(mddev)) { |
6050 | do_md_stop (mddev, 1); | 6231 | do_md_stop (mddev, 1, 0); |
6051 | mddev_unlock(mddev); | 6232 | mddev_unlock(mddev); |
6052 | } | 6233 | } |
6053 | /* | 6234 | /* |