aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c46
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid5.c87
3 files changed, 121 insertions, 19 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 10eb1fce975e..b182f86a19dd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
944 desc->raid_disk < mddev->raid_disks */) { 944 desc->raid_disk < mddev->raid_disks */) {
945 set_bit(In_sync, &rdev->flags); 945 set_bit(In_sync, &rdev->flags);
946 rdev->raid_disk = desc->raid_disk; 946 rdev->raid_disk = desc->raid_disk;
947 } else if (desc->state & (1<<MD_DISK_ACTIVE)) {
948 /* active but not in sync implies recovery up to
949 * reshape position. We don't know exactly where
950 * that is, so set to zero for now */
951 if (mddev->minor_version >= 91) {
952 rdev->recovery_offset = 0;
953 rdev->raid_disk = desc->raid_disk;
954 }
947 } 955 }
948 if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) 956 if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
949 set_bit(WriteMostly, &rdev->flags); 957 set_bit(WriteMostly, &rdev->flags);
@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1032 list_for_each_entry(rdev2, &mddev->disks, same_set) { 1040 list_for_each_entry(rdev2, &mddev->disks, same_set) {
1033 mdp_disk_t *d; 1041 mdp_disk_t *d;
1034 int desc_nr; 1042 int desc_nr;
1035 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) 1043 int is_active = test_bit(In_sync, &rdev2->flags);
1036 && !test_bit(Faulty, &rdev2->flags)) 1044
1045 if (rdev2->raid_disk >= 0 &&
1046 sb->minor_version >= 91)
1047 /* we have nowhere to store the recovery_offset,
1048 * but if it is not below the reshape_position,
1049 * we can piggy-back on that.
1050 */
1051 is_active = 1;
1052 if (rdev2->raid_disk < 0 ||
1053 test_bit(Faulty, &rdev2->flags))
1054 is_active = 0;
1055 if (is_active)
1037 desc_nr = rdev2->raid_disk; 1056 desc_nr = rdev2->raid_disk;
1038 else 1057 else
1039 desc_nr = next_spare++; 1058 desc_nr = next_spare++;
@@ -1043,16 +1062,16 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1043 d->number = rdev2->desc_nr; 1062 d->number = rdev2->desc_nr;
1044 d->major = MAJOR(rdev2->bdev->bd_dev); 1063 d->major = MAJOR(rdev2->bdev->bd_dev);
1045 d->minor = MINOR(rdev2->bdev->bd_dev); 1064 d->minor = MINOR(rdev2->bdev->bd_dev);
1046 if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags) 1065 if (is_active)
1047 && !test_bit(Faulty, &rdev2->flags))
1048 d->raid_disk = rdev2->raid_disk; 1066 d->raid_disk = rdev2->raid_disk;
1049 else 1067 else
1050 d->raid_disk = rdev2->desc_nr; /* compatibility */ 1068 d->raid_disk = rdev2->desc_nr; /* compatibility */
1051 if (test_bit(Faulty, &rdev2->flags)) 1069 if (test_bit(Faulty, &rdev2->flags))
1052 d->state = (1<<MD_DISK_FAULTY); 1070 d->state = (1<<MD_DISK_FAULTY);
1053 else if (test_bit(In_sync, &rdev2->flags)) { 1071 else if (is_active) {
1054 d->state = (1<<MD_DISK_ACTIVE); 1072 d->state = (1<<MD_DISK_ACTIVE);
1055 d->state |= (1<<MD_DISK_SYNC); 1073 if (test_bit(In_sync, &rdev2->flags))
1074 d->state |= (1<<MD_DISK_SYNC);
1056 active++; 1075 active++;
1057 working++; 1076 working++;
1058 } else { 1077 } else {
@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1382 1401
1383 if (rdev->raid_disk >= 0 && 1402 if (rdev->raid_disk >= 0 &&
1384 !test_bit(In_sync, &rdev->flags)) { 1403 !test_bit(In_sync, &rdev->flags)) {
1385 if (mddev->curr_resync_completed > rdev->recovery_offset)
1386 rdev->recovery_offset = mddev->curr_resync_completed;
1387 if (rdev->recovery_offset > 0) { 1404 if (rdev->recovery_offset > 0) {
1388 sb->feature_map |= 1405 sb->feature_map |=
1389 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); 1406 cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
1917 */ 1934 */
1918 mdk_rdev_t *rdev; 1935 mdk_rdev_t *rdev;
1919 1936
1937 /* First make sure individual recovery_offsets are correct */
1938 list_for_each_entry(rdev, &mddev->disks, same_set) {
1939 if (rdev->raid_disk >= 0 &&
1940 !test_bit(In_sync, &rdev->flags) &&
1941 mddev->curr_resync_completed > rdev->recovery_offset)
1942 rdev->recovery_offset = mddev->curr_resync_completed;
1943
1944 }
1920 list_for_each_entry(rdev, &mddev->disks, same_set) { 1945 list_for_each_entry(rdev, &mddev->disks, same_set) {
1921 if (rdev->sb_events == mddev->events || 1946 if (rdev->sb_events == mddev->events ||
1922 (nospares && 1947 (nospares &&
@@ -6504,8 +6529,9 @@ void md_do_sync(mddev_t *mddev)
6504 skip: 6529 skip:
6505 mddev->curr_resync = 0; 6530 mddev->curr_resync = 0;
6506 mddev->curr_resync_completed = 0; 6531 mddev->curr_resync_completed = 0;
6507 mddev->resync_min = 0; 6532 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
6508 mddev->resync_max = MaxSector; 6533 /* We completed so max setting can be forgotten. */
6534 mddev->resync_max = MaxSector;
6509 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6535 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6510 wake_up(&resync_wait); 6536 wake_up(&resync_wait);
6511 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 6537 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a053423785c9..e07ce2e033a9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1650,11 +1650,12 @@ static void raid1d(mddev_t *mddev)
1650 r1_bio->sector, 1650 r1_bio->sector,
1651 r1_bio->sectors); 1651 r1_bio->sectors);
1652 unfreeze_array(conf); 1652 unfreeze_array(conf);
1653 } 1653 } else
1654 md_error(mddev,
1655 conf->mirrors[r1_bio->read_disk].rdev);
1654 1656
1655 bio = r1_bio->bios[r1_bio->read_disk]; 1657 bio = r1_bio->bios[r1_bio->read_disk];
1656 if ((disk=read_balance(conf, r1_bio)) == -1 || 1658 if ((disk=read_balance(conf, r1_bio)) == -1) {
1657 disk == r1_bio->read_disk) {
1658 printk(KERN_ALERT "raid1: %s: unrecoverable I/O" 1659 printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
1659 " read error for block %llu\n", 1660 " read error for block %llu\n",
1660 bdevname(bio->bi_bdev,b), 1661 bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 81abefc172d9..d29215d966da 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4049,6 +4049,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4049 sector_nr = conf->reshape_progress; 4049 sector_nr = conf->reshape_progress;
4050 sector_div(sector_nr, new_data_disks); 4050 sector_div(sector_nr, new_data_disks);
4051 if (sector_nr) { 4051 if (sector_nr) {
4052 mddev->curr_resync_completed = sector_nr;
4053 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
4052 *skipped = 1; 4054 *skipped = 1;
4053 return sector_nr; 4055 return sector_nr;
4054 } 4056 }
@@ -4821,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4821 return ERR_PTR(-ENOMEM); 4823 return ERR_PTR(-ENOMEM);
4822} 4824}
4823 4825
4826
4827static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
4828{
4829 switch (algo) {
4830 case ALGORITHM_PARITY_0:
4831 if (raid_disk < max_degraded)
4832 return 1;
4833 break;
4834 case ALGORITHM_PARITY_N:
4835 if (raid_disk >= raid_disks - max_degraded)
4836 return 1;
4837 break;
4838 case ALGORITHM_PARITY_0_6:
4839 if (raid_disk == 0 ||
4840 raid_disk == raid_disks - 1)
4841 return 1;
4842 break;
4843 case ALGORITHM_LEFT_ASYMMETRIC_6:
4844 case ALGORITHM_RIGHT_ASYMMETRIC_6:
4845 case ALGORITHM_LEFT_SYMMETRIC_6:
4846 case ALGORITHM_RIGHT_SYMMETRIC_6:
4847 if (raid_disk == raid_disks - 1)
4848 return 1;
4849 }
4850 return 0;
4851}
4852
4824static int run(mddev_t *mddev) 4853static int run(mddev_t *mddev)
4825{ 4854{
4826 raid5_conf_t *conf; 4855 raid5_conf_t *conf;
4827 int working_disks = 0, chunk_size; 4856 int working_disks = 0, chunk_size;
4857 int dirty_parity_disks = 0;
4828 mdk_rdev_t *rdev; 4858 mdk_rdev_t *rdev;
4859 sector_t reshape_offset = 0;
4829 4860
4830 if (mddev->recovery_cp != MaxSector) 4861 if (mddev->recovery_cp != MaxSector)
4831 printk(KERN_NOTICE "raid5: %s is not clean" 4862 printk(KERN_NOTICE "raid5: %s is not clean"
@@ -4859,6 +4890,7 @@ static int run(mddev_t *mddev)
4859 "on a stripe boundary\n"); 4890 "on a stripe boundary\n");
4860 return -EINVAL; 4891 return -EINVAL;
4861 } 4892 }
4893 reshape_offset = here_new * mddev->new_chunk_sectors;
4862 /* here_new is the stripe we will write to */ 4894 /* here_new is the stripe we will write to */
4863 here_old = mddev->reshape_position; 4895 here_old = mddev->reshape_position;
4864 sector_div(here_old, mddev->chunk_sectors * 4896 sector_div(here_old, mddev->chunk_sectors *
@@ -4914,10 +4946,51 @@ static int run(mddev_t *mddev)
4914 /* 4946 /*
4915 * 0 for a fully functional array, 1 or 2 for a degraded array. 4947 * 0 for a fully functional array, 1 or 2 for a degraded array.
4916 */ 4948 */
4917 list_for_each_entry(rdev, &mddev->disks, same_set) 4949 list_for_each_entry(rdev, &mddev->disks, same_set) {
4918 if (rdev->raid_disk >= 0 && 4950 if (rdev->raid_disk < 0)
4919 test_bit(In_sync, &rdev->flags)) 4951 continue;
4952 if (test_bit(In_sync, &rdev->flags))
4920 working_disks++; 4953 working_disks++;
4954 /* This disc is not fully in-sync. However if it
4955 * just stored parity (beyond the recovery_offset),
4956 * when we don't need to be concerned about the
4957 * array being dirty.
4958 * When reshape goes 'backwards', we never have
4959 * partially completed devices, so we only need
4960 * to worry about reshape going forwards.
4961 */
4962 /* Hack because v0.91 doesn't store recovery_offset properly. */
4963 if (mddev->major_version == 0 &&
4964 mddev->minor_version > 90)
4965 rdev->recovery_offset = reshape_offset;
4966
4967 printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
4968 rdev->raid_disk, working_disks, conf->prev_algo,
4969 conf->previous_raid_disks, conf->max_degraded,
4970 conf->algorithm, conf->raid_disks,
4971 only_parity(rdev->raid_disk,
4972 conf->prev_algo,
4973 conf->previous_raid_disks,
4974 conf->max_degraded),
4975 only_parity(rdev->raid_disk,
4976 conf->algorithm,
4977 conf->raid_disks,
4978 conf->max_degraded));
4979 if (rdev->recovery_offset < reshape_offset) {
4980 /* We need to check old and new layout */
4981 if (!only_parity(rdev->raid_disk,
4982 conf->algorithm,
4983 conf->raid_disks,
4984 conf->max_degraded))
4985 continue;
4986 }
4987 if (!only_parity(rdev->raid_disk,
4988 conf->prev_algo,
4989 conf->previous_raid_disks,
4990 conf->max_degraded))
4991 continue;
4992 dirty_parity_disks++;
4993 }
4921 4994
4922 mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks) 4995 mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
4923 - working_disks); 4996 - working_disks);
@@ -4933,7 +5006,7 @@ static int run(mddev_t *mddev)
4933 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1); 5006 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
4934 mddev->resync_max_sectors = mddev->dev_sectors; 5007 mddev->resync_max_sectors = mddev->dev_sectors;
4935 5008
4936 if (mddev->degraded > 0 && 5009 if (mddev->degraded > dirty_parity_disks &&
4937 mddev->recovery_cp != MaxSector) { 5010 mddev->recovery_cp != MaxSector) {
4938 if (mddev->ok_start_degraded) 5011 if (mddev->ok_start_degraded)
4939 printk(KERN_WARNING 5012 printk(KERN_WARNING
@@ -5359,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
5359 !test_bit(Faulty, &rdev->flags)) { 5432 !test_bit(Faulty, &rdev->flags)) {
5360 if (raid5_add_disk(mddev, rdev) == 0) { 5433 if (raid5_add_disk(mddev, rdev) == 0) {
5361 char nm[20]; 5434 char nm[20];
5362 set_bit(In_sync, &rdev->flags); 5435 if (rdev->raid_disk >= conf->previous_raid_disks)
5436 set_bit(In_sync, &rdev->flags);
5437 else
5438 rdev->recovery_offset = 0;
5363 added_devices++; 5439 added_devices++;
5364 rdev->recovery_offset = 0;
5365 sprintf(nm, "rd%d", rdev->raid_disk); 5440 sprintf(nm, "rd%d", rdev->raid_disk);
5366 if (sysfs_create_link(&mddev->kobj, 5441 if (sysfs_create_link(&mddev->kobj,
5367 &rdev->kobj, nm)) 5442 &rdev->kobj, nm))