aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:25 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:46 -0400
commit41158c7eb22312cfaa256744e1553bb4042ff085 (patch)
tree21c28e0630d66fc32d758993299a78088a846562
parent289e99e8ed8f36e386bf7de49947311c17ae1482 (diff)
[PATCH] md: optimise reconstruction when re-adding a recently failed drive.
When an array is degraded, bit in the intent-bitmap are never cleared. So if a recently failed drive is re-added, we only need to reconstruct the block that are still reflected in the bitmap. This patch adds support for this re-adding. Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c71
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--include/linux/raid/md_k.h4
3 files changed, 65 insertions, 17 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b02f8d1d77e7..789b114f860a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
577 mdp_disk_t *desc; 577 mdp_disk_t *desc;
578 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); 578 mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
579 579
580 rdev->raid_disk = -1;
581 rdev->in_sync = 0;
580 if (mddev->raid_disks == 0) { 582 if (mddev->raid_disks == 0) {
581 mddev->major_version = 0; 583 mddev->major_version = 0;
582 mddev->minor_version = sb->minor_version; 584 mddev->minor_version = sb->minor_version;
@@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
607 memcpy(mddev->uuid+12,&sb->set_uuid3, 4); 609 memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
608 610
609 mddev->max_disks = MD_SB_DISKS; 611 mddev->max_disks = MD_SB_DISKS;
610 } else { 612 } else if (mddev->pers == NULL) {
611 __u64 ev1; 613 /* Insist on good event counter while assembling */
612 ev1 = md_event(sb); 614 __u64 ev1 = md_event(sb);
613 ++ev1; 615 ++ev1;
614 if (ev1 < mddev->events) 616 if (ev1 < mddev->events)
615 return -EINVAL; 617 return -EINVAL;
616 } 618 } else if (mddev->bitmap) {
619 /* if adding to array with a bitmap, then we can accept an
620 * older device ... but not too old.
621 */
622 __u64 ev1 = md_event(sb);
623 if (ev1 < mddev->bitmap->events_cleared)
624 return 0;
625 } else /* just a hot-add of a new device, leave raid_disk at -1 */
626 return 0;
627
617 if (mddev->level != LEVEL_MULTIPATH) { 628 if (mddev->level != LEVEL_MULTIPATH) {
618 rdev->raid_disk = -1; 629 rdev->faulty = 0;
619 rdev->in_sync = rdev->faulty = 0;
620 desc = sb->disks + rdev->desc_nr; 630 desc = sb->disks + rdev->desc_nr;
621 631
622 if (desc->state & (1<<MD_DISK_FAULTY)) 632 if (desc->state & (1<<MD_DISK_FAULTY))
@@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
626 rdev->in_sync = 1; 636 rdev->in_sync = 1;
627 rdev->raid_disk = desc->raid_disk; 637 rdev->raid_disk = desc->raid_disk;
628 } 638 }
629 } 639 } else /* MULTIPATH are always insync */
640 rdev->in_sync = 1;
630 return 0; 641 return 0;
631} 642}
632 643
@@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
868{ 879{
869 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); 880 struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
870 881
882 rdev->raid_disk = -1;
883 rdev->in_sync = 0;
871 if (mddev->raid_disks == 0) { 884 if (mddev->raid_disks == 0) {
872 mddev->major_version = 1; 885 mddev->major_version = 1;
873 mddev->patch_version = 0; 886 mddev->patch_version = 0;
@@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
885 memcpy(mddev->uuid, sb->set_uuid, 16); 898 memcpy(mddev->uuid, sb->set_uuid, 16);
886 899
887 mddev->max_disks = (4096-256)/2; 900 mddev->max_disks = (4096-256)/2;
888 } else { 901 } else if (mddev->pers == NULL) {
889 __u64 ev1; 902 /* Insist of good event counter while assembling */
890 ev1 = le64_to_cpu(sb->events); 903 __u64 ev1 = le64_to_cpu(sb->events);
891 ++ev1; 904 ++ev1;
892 if (ev1 < mddev->events) 905 if (ev1 < mddev->events)
893 return -EINVAL; 906 return -EINVAL;
894 } 907 } else if (mddev->bitmap) {
908 /* If adding to array with a bitmap, then we can accept an
909 * older device, but not too old.
910 */
911 __u64 ev1 = le64_to_cpu(sb->events);
912 if (ev1 < mddev->bitmap->events_cleared)
913 return 0;
914 } else /* just a hot-add of a new device, leave raid_disk at -1 */
915 return 0;
895 916
896 if (mddev->level != LEVEL_MULTIPATH) { 917 if (mddev->level != LEVEL_MULTIPATH) {
897 int role; 918 int role;
@@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
899 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 920 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
900 switch(role) { 921 switch(role) {
901 case 0xffff: /* spare */ 922 case 0xffff: /* spare */
902 rdev->in_sync = 0;
903 rdev->faulty = 0; 923 rdev->faulty = 0;
904 rdev->raid_disk = -1;
905 break; 924 break;
906 case 0xfffe: /* faulty */ 925 case 0xfffe: /* faulty */
907 rdev->in_sync = 0;
908 rdev->faulty = 1; 926 rdev->faulty = 1;
909 rdev->raid_disk = -1;
910 break; 927 break;
911 default: 928 default:
912 rdev->in_sync = 1; 929 rdev->in_sync = 1;
@@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
914 rdev->raid_disk = role; 931 rdev->raid_disk = role;
915 break; 932 break;
916 } 933 }
917 } 934 } else /* MULTIPATH are always insync */
935 rdev->in_sync = 1;
936
918 return 0; 937 return 0;
919} 938}
920 939
@@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
2155 PTR_ERR(rdev)); 2174 PTR_ERR(rdev));
2156 return PTR_ERR(rdev); 2175 return PTR_ERR(rdev);
2157 } 2176 }
2177 /* set save_raid_disk if appropriate */
2178 if (!mddev->persistent) {
2179 if (info->state & (1<<MD_DISK_SYNC) &&
2180 info->raid_disk < mddev->raid_disks)
2181 rdev->raid_disk = info->raid_disk;
2182 else
2183 rdev->raid_disk = -1;
2184 } else
2185 super_types[mddev->major_version].
2186 validate_super(mddev, rdev);
2187 rdev->saved_raid_disk = rdev->raid_disk;
2188
2158 rdev->in_sync = 0; /* just to be sure */ 2189 rdev->in_sync = 0; /* just to be sure */
2159 rdev->raid_disk = -1; 2190 rdev->raid_disk = -1;
2160 err = bind_rdev_to_array(rdev, mddev); 2191 err = bind_rdev_to_array(rdev, mddev);
@@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev)
3706 mddev->pers->spare_active(mddev); 3737 mddev->pers->spare_active(mddev);
3707 } 3738 }
3708 md_update_sb(mddev); 3739 md_update_sb(mddev);
3740
3741 /* if array is no-longer degraded, then any saved_raid_disk
3742 * information must be scrapped
3743 */
3744 if (!mddev->degraded)
3745 ITERATE_RDEV(mddev,rdev,rtmp)
3746 rdev->saved_raid_disk = -1;
3747
3709 mddev->recovery = 0; 3748 mddev->recovery = 0;
3710 /* flag recovery needed just to double check */ 3749 /* flag recovery needed just to double check */
3711 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 3750 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c3b4772cfaea..3f5234fe3593 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
811{ 811{
812 conf_t *conf = mddev->private; 812 conf_t *conf = mddev->private;
813 int found = 0; 813 int found = 0;
814 int mirror; 814 int mirror = 0;
815 mirror_info_t *p; 815 mirror_info_t *p;
816 816
817 if (rdev->saved_raid_disk >= 0 &&
818 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
819 mirror = rdev->saved_raid_disk;
817 for (mirror=0; mirror < mddev->raid_disks; mirror++) 820 for (mirror=0; mirror < mddev->raid_disks; mirror++)
818 if ( !(p=conf->mirrors+mirror)->rdev) { 821 if ( !(p=conf->mirrors+mirror)->rdev) {
819 822
@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
830 p->head_position = 0; 833 p->head_position = 0;
831 rdev->raid_disk = mirror; 834 rdev->raid_disk = mirror;
832 found = 1; 835 found = 1;
836 if (rdev->saved_raid_disk != mirror)
837 conf->fullsync = 1;
833 p->rdev = rdev; 838 p->rdev = rdev;
834 break; 839 break;
835 } 840 }
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 16e94a9f0f8c..6cdcb4434c6c 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -183,6 +183,10 @@ struct mdk_rdev_s
183 183
184 int desc_nr; /* descriptor index in the superblock */ 184 int desc_nr; /* descriptor index in the superblock */
185 int raid_disk; /* role of device in array */ 185 int raid_disk; /* role of device in array */
186 int saved_raid_disk; /* role that device used to have in the
187 * array and could again if we did a partial
188 * resync from the bitmap
189 */
186 190
187 atomic_t nr_pending; /* number of pending requests. 191 atomic_t nr_pending; /* number of pending requests.
188 * only maintained for arrays that 192 * only maintained for arrays that