diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/md.c | 46 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 7 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 87 |
3 files changed, 121 insertions, 19 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 10eb1fce975e..b182f86a19dd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 944 | desc->raid_disk < mddev->raid_disks */) { | 944 | desc->raid_disk < mddev->raid_disks */) { |
| 945 | set_bit(In_sync, &rdev->flags); | 945 | set_bit(In_sync, &rdev->flags); |
| 946 | rdev->raid_disk = desc->raid_disk; | 946 | rdev->raid_disk = desc->raid_disk; |
| 947 | } else if (desc->state & (1<<MD_DISK_ACTIVE)) { | ||
| 948 | /* active but not in sync implies recovery up to | ||
| 949 | * reshape position. We don't know exactly where | ||
| 950 | * that is, so set to zero for now */ | ||
| 951 | if (mddev->minor_version >= 91) { | ||
| 952 | rdev->recovery_offset = 0; | ||
| 953 | rdev->raid_disk = desc->raid_disk; | ||
| 954 | } | ||
| 947 | } | 955 | } |
| 948 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) | 956 | if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) |
| 949 | set_bit(WriteMostly, &rdev->flags); | 957 | set_bit(WriteMostly, &rdev->flags); |
| @@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1032 | list_for_each_entry(rdev2, &mddev->disks, same_set) { | 1040 | list_for_each_entry(rdev2, &mddev->disks, same_set) { |
| 1033 | mdp_disk_t *d; | 1041 | mdp_disk_t *d; |
| 1034 | int desc_nr; | 1042 | int desc_nr; |
| 1035 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) | 1043 | int is_active = test_bit(In_sync, &rdev2->flags); |
| 1036 | && !test_bit(Faulty, &rdev2->flags)) | 1044 | |
| 1045 | if (rdev2->raid_disk >= 0 && | ||
| 1046 | sb->minor_version >= 91) | ||
| 1047 | /* we have nowhere to store the recovery_offset, | ||
| 1048 | * but if it is not below the reshape_position, | ||
| 1049 | * we can piggy-back on that. | ||
| 1050 | */ | ||
| 1051 | is_active = 1; | ||
| 1052 | if (rdev2->raid_disk < 0 || | ||
| 1053 | test_bit(Faulty, &rdev2->flags)) | ||
| 1054 | is_active = 0; | ||
| 1055 | if (is_active) | ||
| 1037 | desc_nr = rdev2->raid_disk; | 1056 | desc_nr = rdev2->raid_disk; |
| 1038 | else | 1057 | else |
| 1039 | desc_nr = next_spare++; | 1058 | desc_nr = next_spare++; |
| @@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1043 | d->number = rdev2->desc_nr; | 1062 | d->number = rdev2->desc_nr; |
| 1044 | d->major = MAJOR(rdev2->bdev->bd_dev); | 1063 | d->major = MAJOR(rdev2->bdev->bd_dev); |
| 1045 | d->minor = MINOR(rdev2->bdev->bd_dev); | 1064 | d->minor = MINOR(rdev2->bdev->bd_dev); |
| 1046 | if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) | 1065 | if (is_active) |
| 1047 | && !test_bit(Faulty, &rdev2->flags)) | ||
| 1048 | d->raid_disk = rdev2->raid_disk; | 1066 | d->raid_disk = rdev2->raid_disk; |
| 1049 | else | 1067 | else |
| 1050 | d->raid_disk = rdev2->desc_nr; /* compatibility */ | 1068 | d->raid_disk = rdev2->desc_nr; /* compatibility */ |
| 1051 | if (test_bit(Faulty, &rdev2->flags)) | 1069 | if (test_bit(Faulty, &rdev2->flags)) |
| 1052 | d->state = (1<<MD_DISK_FAULTY); | 1070 | d->state = (1<<MD_DISK_FAULTY); |
| 1053 | else if (test_bit(In_sync, &rdev2->flags)) { | 1071 | else if (is_active) { |
| 1054 | d->state = (1<<MD_DISK_ACTIVE); | 1072 | d->state = (1<<MD_DISK_ACTIVE); |
| 1055 | d->state |= (1<<MD_DISK_SYNC); | 1073 | if (test_bit(In_sync, &rdev2->flags)) |
| 1074 | d->state |= (1<<MD_DISK_SYNC); | ||
| 1056 | active++; | 1075 | active++; |
| 1057 | working++; | 1076 | working++; |
| 1058 | } else { | 1077 | } else { |
| @@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1382 | 1401 | ||
| 1383 | if (rdev->raid_disk >= 0 && | 1402 | if (rdev->raid_disk >= 0 && |
| 1384 | !test_bit(In_sync, &rdev->flags)) { | 1403 | !test_bit(In_sync, &rdev->flags)) { |
| 1385 | if (mddev->curr_resync_completed > rdev->recovery_offset) | ||
| 1386 | rdev->recovery_offset = mddev->curr_resync_completed; | ||
| 1387 | if (rdev->recovery_offset > 0) { | 1404 | if (rdev->recovery_offset > 0) { |
| 1388 | sb->feature_map |= | 1405 | sb->feature_map |= |
| 1389 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | 1406 | cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); |
| @@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares) | |||
| 1917 | */ | 1934 | */ |
| 1918 | mdk_rdev_t *rdev; | 1935 | mdk_rdev_t *rdev; |
| 1919 | 1936 | ||
| 1937 | /* First make sure individual recovery_offsets are correct */ | ||
| 1938 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
| 1939 | if (rdev->raid_disk >= 0 && | ||
| 1940 | !test_bit(In_sync, &rdev->flags) && | ||
| 1941 | mddev->curr_resync_completed > rdev->recovery_offset) | ||
| 1942 | rdev->recovery_offset = mddev->curr_resync_completed; | ||
| 1943 | |||
| 1944 | } | ||
| 1920 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 1945 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
| 1921 | if (rdev->sb_events == mddev->events || | 1946 | if (rdev->sb_events == mddev->events || |
| 1922 | (nospares && | 1947 | (nospares && |
| @@ -6504,8 +6529,9 @@ void md_do_sync(mddev_t *mddev) | |||
| 6504 | skip: | 6529 | skip: |
| 6505 | mddev->curr_resync = 0; | 6530 | mddev->curr_resync = 0; |
| 6506 | mddev->curr_resync_completed = 0; | 6531 | mddev->curr_resync_completed = 0; |
| 6507 | mddev->resync_min = 0; | 6532 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) |
| 6508 | mddev->resync_max = MaxSector; | 6533 | /* We completed so max setting can be forgotten. */ |
| 6534 | mddev->resync_max = MaxSector; | ||
| 6509 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6535 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
| 6510 | wake_up(&resync_wait); | 6536 | wake_up(&resync_wait); |
| 6511 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); | 6537 | set_bit(MD_RECOVERY_DONE, &mddev->recovery); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a053423785c9..e07ce2e033a9 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1650,11 +1650,12 @@ static void raid1d(mddev_t *mddev) | |||
| 1650 | r1_bio->sector, | 1650 | r1_bio->sector, |
| 1651 | r1_bio->sectors); | 1651 | r1_bio->sectors); |
| 1652 | unfreeze_array(conf); | 1652 | unfreeze_array(conf); |
| 1653 | } | 1653 | } else |
| 1654 | md_error(mddev, | ||
| 1655 | conf->mirrors[r1_bio->read_disk].rdev); | ||
| 1654 | 1656 | ||
| 1655 | bio = r1_bio->bios[r1_bio->read_disk]; | 1657 | bio = r1_bio->bios[r1_bio->read_disk]; |
| 1656 | if ((disk=read_balance(conf, r1_bio)) == -1 || | 1658 | if ((disk=read_balance(conf, r1_bio)) == -1) { |
| 1657 | disk == r1_bio->read_disk) { | ||
| 1658 | printk(KERN_ALERT "raid1: %s: unrecoverable I/O" | 1659 | printk(KERN_ALERT "raid1: %s: unrecoverable I/O" |
| 1659 | " read error for block %llu\n", | 1660 | " read error for block %llu\n", |
| 1660 | bdevname(bio->bi_bdev,b), | 1661 | bdevname(bio->bi_bdev,b), |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 81abefc172d9..d29215d966da 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -4049,6 +4049,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 4049 | sector_nr = conf->reshape_progress; | 4049 | sector_nr = conf->reshape_progress; |
| 4050 | sector_div(sector_nr, new_data_disks); | 4050 | sector_div(sector_nr, new_data_disks); |
| 4051 | if (sector_nr) { | 4051 | if (sector_nr) { |
| 4052 | mddev->curr_resync_completed = sector_nr; | ||
| 4053 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | ||
| 4052 | *skipped = 1; | 4054 | *skipped = 1; |
| 4053 | return sector_nr; | 4055 | return sector_nr; |
| 4054 | } | 4056 | } |
| @@ -4821,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4821 | return ERR_PTR(-ENOMEM); | 4823 | return ERR_PTR(-ENOMEM); |
| 4822 | } | 4824 | } |
| 4823 | 4825 | ||
| 4826 | |||
| 4827 | static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded) | ||
| 4828 | { | ||
| 4829 | switch (algo) { | ||
| 4830 | case ALGORITHM_PARITY_0: | ||
| 4831 | if (raid_disk < max_degraded) | ||
| 4832 | return 1; | ||
| 4833 | break; | ||
| 4834 | case ALGORITHM_PARITY_N: | ||
| 4835 | if (raid_disk >= raid_disks - max_degraded) | ||
| 4836 | return 1; | ||
| 4837 | break; | ||
| 4838 | case ALGORITHM_PARITY_0_6: | ||
| 4839 | if (raid_disk == 0 || | ||
| 4840 | raid_disk == raid_disks - 1) | ||
| 4841 | return 1; | ||
| 4842 | break; | ||
| 4843 | case ALGORITHM_LEFT_ASYMMETRIC_6: | ||
| 4844 | case ALGORITHM_RIGHT_ASYMMETRIC_6: | ||
| 4845 | case ALGORITHM_LEFT_SYMMETRIC_6: | ||
| 4846 | case ALGORITHM_RIGHT_SYMMETRIC_6: | ||
| 4847 | if (raid_disk == raid_disks - 1) | ||
| 4848 | return 1; | ||
| 4849 | } | ||
| 4850 | return 0; | ||
| 4851 | } | ||
| 4852 | |||
| 4824 | static int run(mddev_t *mddev) | 4853 | static int run(mddev_t *mddev) |
| 4825 | { | 4854 | { |
| 4826 | raid5_conf_t *conf; | 4855 | raid5_conf_t *conf; |
| 4827 | int working_disks = 0, chunk_size; | 4856 | int working_disks = 0, chunk_size; |
| 4857 | int dirty_parity_disks = 0; | ||
| 4828 | mdk_rdev_t *rdev; | 4858 | mdk_rdev_t *rdev; |
| 4859 | sector_t reshape_offset = 0; | ||
| 4829 | 4860 | ||
| 4830 | if (mddev->recovery_cp != MaxSector) | 4861 | if (mddev->recovery_cp != MaxSector) |
| 4831 | printk(KERN_NOTICE "raid5: %s is not clean" | 4862 | printk(KERN_NOTICE "raid5: %s is not clean" |
| @@ -4859,6 +4890,7 @@ static int run(mddev_t *mddev) | |||
| 4859 | "on a stripe boundary\n"); | 4890 | "on a stripe boundary\n"); |
| 4860 | return -EINVAL; | 4891 | return -EINVAL; |
| 4861 | } | 4892 | } |
| 4893 | reshape_offset = here_new * mddev->new_chunk_sectors; | ||
| 4862 | /* here_new is the stripe we will write to */ | 4894 | /* here_new is the stripe we will write to */ |
| 4863 | here_old = mddev->reshape_position; | 4895 | here_old = mddev->reshape_position; |
| 4864 | sector_div(here_old, mddev->chunk_sectors * | 4896 | sector_div(here_old, mddev->chunk_sectors * |
| @@ -4914,10 +4946,51 @@ static int run(mddev_t *mddev) | |||
| 4914 | /* | 4946 | /* |
| 4915 | * 0 for a fully functional array, 1 or 2 for a degraded array. | 4947 | * 0 for a fully functional array, 1 or 2 for a degraded array. |
| 4916 | */ | 4948 | */ |
| 4917 | list_for_each_entry(rdev, &mddev->disks, same_set) | 4949 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
| 4918 | if (rdev->raid_disk >= 0 && | 4950 | if (rdev->raid_disk < 0) |
| 4919 | test_bit(In_sync, &rdev->flags)) | 4951 | continue; |
| 4952 | if (test_bit(In_sync, &rdev->flags)) | ||
| 4920 | working_disks++; | 4953 | working_disks++; |
| 4954 | /* This disc is not fully in-sync. However if it | ||
| 4955 | * just stored parity (beyond the recovery_offset), | ||
| 4956 | * when we don't need to be concerned about the | ||
| 4957 | * array being dirty. | ||
| 4958 | * When reshape goes 'backwards', we never have | ||
| 4959 | * partially completed devices, so we only need | ||
| 4960 | * to worry about reshape going forwards. | ||
| 4961 | */ | ||
| 4962 | /* Hack because v0.91 doesn't store recovery_offset properly. */ | ||
| 4963 | if (mddev->major_version == 0 && | ||
| 4964 | mddev->minor_version > 90) | ||
| 4965 | rdev->recovery_offset = reshape_offset; | ||
| 4966 | |||
| 4967 | printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n", | ||
| 4968 | rdev->raid_disk, working_disks, conf->prev_algo, | ||
| 4969 | conf->previous_raid_disks, conf->max_degraded, | ||
| 4970 | conf->algorithm, conf->raid_disks, | ||
| 4971 | only_parity(rdev->raid_disk, | ||
| 4972 | conf->prev_algo, | ||
| 4973 | conf->previous_raid_disks, | ||
| 4974 | conf->max_degraded), | ||
| 4975 | only_parity(rdev->raid_disk, | ||
| 4976 | conf->algorithm, | ||
| 4977 | conf->raid_disks, | ||
| 4978 | conf->max_degraded)); | ||
| 4979 | if (rdev->recovery_offset < reshape_offset) { | ||
| 4980 | /* We need to check old and new layout */ | ||
| 4981 | if (!only_parity(rdev->raid_disk, | ||
| 4982 | conf->algorithm, | ||
| 4983 | conf->raid_disks, | ||
| 4984 | conf->max_degraded)) | ||
| 4985 | continue; | ||
| 4986 | } | ||
| 4987 | if (!only_parity(rdev->raid_disk, | ||
| 4988 | conf->prev_algo, | ||
| 4989 | conf->previous_raid_disks, | ||
| 4990 | conf->max_degraded)) | ||
| 4991 | continue; | ||
| 4992 | dirty_parity_disks++; | ||
| 4993 | } | ||
| 4921 | 4994 | ||
| 4922 | mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) | 4995 | mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) |
| 4923 | - working_disks); | 4996 | - working_disks); |
| @@ -4933,7 +5006,7 @@ static int run(mddev_t *mddev) | |||
| 4933 | mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); | 5006 | mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); |
| 4934 | mddev->resync_max_sectors = mddev->dev_sectors; | 5007 | mddev->resync_max_sectors = mddev->dev_sectors; |
| 4935 | 5008 | ||
| 4936 | if (mddev->degraded > 0 && | 5009 | if (mddev->degraded > dirty_parity_disks && |
| 4937 | mddev->recovery_cp != MaxSector) { | 5010 | mddev->recovery_cp != MaxSector) { |
| 4938 | if (mddev->ok_start_degraded) | 5011 | if (mddev->ok_start_degraded) |
| 4939 | printk(KERN_WARNING | 5012 | printk(KERN_WARNING |
| @@ -5359,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 5359 | !test_bit(Faulty, &rdev->flags)) { | 5432 | !test_bit(Faulty, &rdev->flags)) { |
| 5360 | if (raid5_add_disk(mddev, rdev) == 0) { | 5433 | if (raid5_add_disk(mddev, rdev) == 0) { |
| 5361 | char nm[20]; | 5434 | char nm[20]; |
| 5362 | set_bit(In_sync, &rdev->flags); | 5435 | if (rdev->raid_disk >= conf->previous_raid_disks) |
| 5436 | set_bit(In_sync, &rdev->flags); | ||
| 5437 | else | ||
| 5438 | rdev->recovery_offset = 0; | ||
| 5363 | added_devices++; | 5439 | added_devices++; |
| 5364 | rdev->recovery_offset = 0; | ||
| 5365 | sprintf(nm, "rd%d", rdev->raid_disk); | 5440 | sprintf(nm, "rd%d", rdev->raid_disk); |
| 5366 | if (sysfs_create_link(&mddev->kobj, | 5441 | if (sysfs_create_link(&mddev->kobj, |
| 5367 | &rdev->kobj, nm)) | 5442 | &rdev->kobj, nm)) |
