diff options
author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-06-21 20:17:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 22:07:46 -0400 |
commit | 41158c7eb22312cfaa256744e1553bb4042ff085 (patch) | |
tree | 21c28e0630d66fc32d758993299a78088a846562 | |
parent | 289e99e8ed8f36e386bf7de49947311c17ae1482 (diff) |
[PATCH] md: optimise reconstruction when re-adding a recently failed drive.
When an array is degraded, bit in the intent-bitmap are never cleared. So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.
This patch adds support for this re-adding.
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/md.c | 71 | ||||
-rw-r--r-- | drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 4 |
3 files changed, 65 insertions, 17 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index b02f8d1d77e7..789b114f860a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
577 | mdp_disk_t *desc; | 577 | mdp_disk_t *desc; |
578 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); | 578 | mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); |
579 | 579 | ||
580 | rdev->raid_disk = -1; | ||
581 | rdev->in_sync = 0; | ||
580 | if (mddev->raid_disks == 0) { | 582 | if (mddev->raid_disks == 0) { |
581 | mddev->major_version = 0; | 583 | mddev->major_version = 0; |
582 | mddev->minor_version = sb->minor_version; | 584 | mddev->minor_version = sb->minor_version; |
@@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
607 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); | 609 | memcpy(mddev->uuid+12,&sb->set_uuid3, 4); |
608 | 610 | ||
609 | mddev->max_disks = MD_SB_DISKS; | 611 | mddev->max_disks = MD_SB_DISKS; |
610 | } else { | 612 | } else if (mddev->pers == NULL) { |
611 | __u64 ev1; | 613 | /* Insist on good event counter while assembling */ |
612 | ev1 = md_event(sb); | 614 | __u64 ev1 = md_event(sb); |
613 | ++ev1; | 615 | ++ev1; |
614 | if (ev1 < mddev->events) | 616 | if (ev1 < mddev->events) |
615 | return -EINVAL; | 617 | return -EINVAL; |
616 | } | 618 | } else if (mddev->bitmap) { |
619 | /* if adding to array with a bitmap, then we can accept an | ||
620 | * older device ... but not too old. | ||
621 | */ | ||
622 | __u64 ev1 = md_event(sb); | ||
623 | if (ev1 < mddev->bitmap->events_cleared) | ||
624 | return 0; | ||
625 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
626 | return 0; | ||
627 | |||
617 | if (mddev->level != LEVEL_MULTIPATH) { | 628 | if (mddev->level != LEVEL_MULTIPATH) { |
618 | rdev->raid_disk = -1; | 629 | rdev->faulty = 0; |
619 | rdev->in_sync = rdev->faulty = 0; | ||
620 | desc = sb->disks + rdev->desc_nr; | 630 | desc = sb->disks + rdev->desc_nr; |
621 | 631 | ||
622 | if (desc->state & (1<<MD_DISK_FAULTY)) | 632 | if (desc->state & (1<<MD_DISK_FAULTY)) |
@@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
626 | rdev->in_sync = 1; | 636 | rdev->in_sync = 1; |
627 | rdev->raid_disk = desc->raid_disk; | 637 | rdev->raid_disk = desc->raid_disk; |
628 | } | 638 | } |
629 | } | 639 | } else /* MULTIPATH are always insync */ |
640 | rdev->in_sync = 1; | ||
630 | return 0; | 641 | return 0; |
631 | } | 642 | } |
632 | 643 | ||
@@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
868 | { | 879 | { |
869 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); | 880 | struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); |
870 | 881 | ||
882 | rdev->raid_disk = -1; | ||
883 | rdev->in_sync = 0; | ||
871 | if (mddev->raid_disks == 0) { | 884 | if (mddev->raid_disks == 0) { |
872 | mddev->major_version = 1; | 885 | mddev->major_version = 1; |
873 | mddev->patch_version = 0; | 886 | mddev->patch_version = 0; |
@@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
885 | memcpy(mddev->uuid, sb->set_uuid, 16); | 898 | memcpy(mddev->uuid, sb->set_uuid, 16); |
886 | 899 | ||
887 | mddev->max_disks = (4096-256)/2; | 900 | mddev->max_disks = (4096-256)/2; |
888 | } else { | 901 | } else if (mddev->pers == NULL) { |
889 | __u64 ev1; | 902 | /* Insist of good event counter while assembling */ |
890 | ev1 = le64_to_cpu(sb->events); | 903 | __u64 ev1 = le64_to_cpu(sb->events); |
891 | ++ev1; | 904 | ++ev1; |
892 | if (ev1 < mddev->events) | 905 | if (ev1 < mddev->events) |
893 | return -EINVAL; | 906 | return -EINVAL; |
894 | } | 907 | } else if (mddev->bitmap) { |
908 | /* If adding to array with a bitmap, then we can accept an | ||
909 | * older device, but not too old. | ||
910 | */ | ||
911 | __u64 ev1 = le64_to_cpu(sb->events); | ||
912 | if (ev1 < mddev->bitmap->events_cleared) | ||
913 | return 0; | ||
914 | } else /* just a hot-add of a new device, leave raid_disk at -1 */ | ||
915 | return 0; | ||
895 | 916 | ||
896 | if (mddev->level != LEVEL_MULTIPATH) { | 917 | if (mddev->level != LEVEL_MULTIPATH) { |
897 | int role; | 918 | int role; |
@@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
899 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 920 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); |
900 | switch(role) { | 921 | switch(role) { |
901 | case 0xffff: /* spare */ | 922 | case 0xffff: /* spare */ |
902 | rdev->in_sync = 0; | ||
903 | rdev->faulty = 0; | 923 | rdev->faulty = 0; |
904 | rdev->raid_disk = -1; | ||
905 | break; | 924 | break; |
906 | case 0xfffe: /* faulty */ | 925 | case 0xfffe: /* faulty */ |
907 | rdev->in_sync = 0; | ||
908 | rdev->faulty = 1; | 926 | rdev->faulty = 1; |
909 | rdev->raid_disk = -1; | ||
910 | break; | 927 | break; |
911 | default: | 928 | default: |
912 | rdev->in_sync = 1; | 929 | rdev->in_sync = 1; |
@@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
914 | rdev->raid_disk = role; | 931 | rdev->raid_disk = role; |
915 | break; | 932 | break; |
916 | } | 933 | } |
917 | } | 934 | } else /* MULTIPATH are always insync */ |
935 | rdev->in_sync = 1; | ||
936 | |||
918 | return 0; | 937 | return 0; |
919 | } | 938 | } |
920 | 939 | ||
@@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) | |||
2155 | PTR_ERR(rdev)); | 2174 | PTR_ERR(rdev)); |
2156 | return PTR_ERR(rdev); | 2175 | return PTR_ERR(rdev); |
2157 | } | 2176 | } |
2177 | /* set save_raid_disk if appropriate */ | ||
2178 | if (!mddev->persistent) { | ||
2179 | if (info->state & (1<<MD_DISK_SYNC) && | ||
2180 | info->raid_disk < mddev->raid_disks) | ||
2181 | rdev->raid_disk = info->raid_disk; | ||
2182 | else | ||
2183 | rdev->raid_disk = -1; | ||
2184 | } else | ||
2185 | super_types[mddev->major_version]. | ||
2186 | validate_super(mddev, rdev); | ||
2187 | rdev->saved_raid_disk = rdev->raid_disk; | ||
2188 | |||
2158 | rdev->in_sync = 0; /* just to be sure */ | 2189 | rdev->in_sync = 0; /* just to be sure */ |
2159 | rdev->raid_disk = -1; | 2190 | rdev->raid_disk = -1; |
2160 | err = bind_rdev_to_array(rdev, mddev); | 2191 | err = bind_rdev_to_array(rdev, mddev); |
@@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev) | |||
3706 | mddev->pers->spare_active(mddev); | 3737 | mddev->pers->spare_active(mddev); |
3707 | } | 3738 | } |
3708 | md_update_sb(mddev); | 3739 | md_update_sb(mddev); |
3740 | |||
3741 | /* if array is no-longer degraded, then any saved_raid_disk | ||
3742 | * information must be scrapped | ||
3743 | */ | ||
3744 | if (!mddev->degraded) | ||
3745 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
3746 | rdev->saved_raid_disk = -1; | ||
3747 | |||
3709 | mddev->recovery = 0; | 3748 | mddev->recovery = 0; |
3710 | /* flag recovery needed just to double check */ | 3749 | /* flag recovery needed just to double check */ |
3711 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 3750 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c3b4772cfaea..3f5234fe3593 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
811 | { | 811 | { |
812 | conf_t *conf = mddev->private; | 812 | conf_t *conf = mddev->private; |
813 | int found = 0; | 813 | int found = 0; |
814 | int mirror; | 814 | int mirror = 0; |
815 | mirror_info_t *p; | 815 | mirror_info_t *p; |
816 | 816 | ||
817 | if (rdev->saved_raid_disk >= 0 && | ||
818 | conf->mirrors[rdev->saved_raid_disk].rdev == NULL) | ||
819 | mirror = rdev->saved_raid_disk; | ||
817 | for (mirror=0; mirror < mddev->raid_disks; mirror++) | 820 | for (mirror=0; mirror < mddev->raid_disks; mirror++) |
818 | if ( !(p=conf->mirrors+mirror)->rdev) { | 821 | if ( !(p=conf->mirrors+mirror)->rdev) { |
819 | 822 | ||
@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
830 | p->head_position = 0; | 833 | p->head_position = 0; |
831 | rdev->raid_disk = mirror; | 834 | rdev->raid_disk = mirror; |
832 | found = 1; | 835 | found = 1; |
836 | if (rdev->saved_raid_disk != mirror) | ||
837 | conf->fullsync = 1; | ||
833 | p->rdev = rdev; | 838 | p->rdev = rdev; |
834 | break; | 839 | break; |
835 | } | 840 | } |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 16e94a9f0f8c..6cdcb4434c6c 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -183,6 +183,10 @@ struct mdk_rdev_s | |||
183 | 183 | ||
184 | int desc_nr; /* descriptor index in the superblock */ | 184 | int desc_nr; /* descriptor index in the superblock */ |
185 | int raid_disk; /* role of device in array */ | 185 | int raid_disk; /* role of device in array */ |
186 | int saved_raid_disk; /* role that device used to have in the | ||
187 | * array and could again if we did a partial | ||
188 | * resync from the bitmap | ||
189 | */ | ||
186 | 190 | ||
187 | atomic_t nr_pending; /* number of pending requests. | 191 | atomic_t nr_pending; /* number of pending requests. |
188 | * only maintained for arrays that | 192 | * only maintained for arrays that |