diff options
author | NeilBrown <neilb@suse.de> | 2006-06-26 03:27:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 12:58:37 -0400 |
commit | 5fd6c1dce06ec24ef3de20fe0c7ecf2ba9fe5ef9 (patch) | |
tree | bd7cc8c22615e8622d3d63b7fc68dcc3ac1964de /drivers/md/md.c | |
parent | a8a55c387da28d67d98f56e4f5021462cb61f7a6 (diff) |
[PATCH] md: allow checkpoint of recovery with version-1 superblock
For a while we have had checkpointing of resync. The version-1 superblock
allows recovery to be checkpointed as well, and this patch implements that.
Due to early carelessness we need to add a feature flag to signal that the
recovery_offset field is in use, otherwise older kernels would assume that a
partially recovered array is in fact fully recovered.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 115 |
1 files changed, 94 insertions, 21 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 71d46eb2c438..a296edd7e1c3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -1175,7 +1175,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1175 | set_bit(Faulty, &rdev->flags); | 1175 | set_bit(Faulty, &rdev->flags); |
1176 | break; | 1176 | break; |
1177 | default: | 1177 | default: |
1178 | set_bit(In_sync, &rdev->flags); | 1178 | if ((le32_to_cpu(sb->feature_map) & |
1179 | MD_FEATURE_RECOVERY_OFFSET)) | ||
1180 | rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); | ||
1181 | else | ||
1182 | set_bit(In_sync, &rdev->flags); | ||
1179 | rdev->raid_disk = role; | 1183 | rdev->raid_disk = role; |
1180 | break; | 1184 | break; |
1181 | } | 1185 | } |
@@ -1199,6 +1203,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1199 | 1203 | ||
1200 | sb->feature_map = 0; | 1204 | sb->feature_map = 0; |
1201 | sb->pad0 = 0; | 1205 | sb->pad0 = 0; |
1206 | sb->recovery_offset = cpu_to_le64(0); | ||
1202 | memset(sb->pad1, 0, sizeof(sb->pad1)); | 1207 | memset(sb->pad1, 0, sizeof(sb->pad1)); |
1203 | memset(sb->pad2, 0, sizeof(sb->pad2)); | 1208 | memset(sb->pad2, 0, sizeof(sb->pad2)); |
1204 | memset(sb->pad3, 0, sizeof(sb->pad3)); | 1209 | memset(sb->pad3, 0, sizeof(sb->pad3)); |
@@ -1219,6 +1224,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1219 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1224 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1220 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1225 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1221 | } | 1226 | } |
1227 | |||
1228 | if (rdev->raid_disk >= 0 && | ||
1229 | !test_bit(In_sync, &rdev->flags) && | ||
1230 | rdev->recovery_offset > 0) { | ||
1231 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET); | ||
1232 | sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); | ||
1233 | } | ||
1234 | |||
1222 | if (mddev->reshape_position != MaxSector) { | 1235 | if (mddev->reshape_position != MaxSector) { |
1223 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | 1236 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); |
1224 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | 1237 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); |
@@ -1243,11 +1256,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1243 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1256 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1244 | else if (test_bit(In_sync, &rdev2->flags)) | 1257 | else if (test_bit(In_sync, &rdev2->flags)) |
1245 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | 1258 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); |
1259 | else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0) | ||
1260 | sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); | ||
1246 | else | 1261 | else |
1247 | sb->dev_roles[i] = cpu_to_le16(0xffff); | 1262 | sb->dev_roles[i] = cpu_to_le16(0xffff); |
1248 | } | 1263 | } |
1249 | 1264 | ||
1250 | sb->recovery_offset = cpu_to_le64(0); /* not supported yet */ | ||
1251 | sb->sb_csum = calc_sb_1_csum(sb); | 1265 | sb->sb_csum = calc_sb_1_csum(sb); |
1252 | } | 1266 | } |
1253 | 1267 | ||
@@ -2603,8 +2617,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | |||
2603 | return NULL; | 2617 | return NULL; |
2604 | } | 2618 | } |
2605 | 2619 | ||
2606 | void md_wakeup_thread(mdk_thread_t *thread); | ||
2607 | |||
2608 | static void md_safemode_timeout(unsigned long data) | 2620 | static void md_safemode_timeout(unsigned long data) |
2609 | { | 2621 | { |
2610 | mddev_t *mddev = (mddev_t *) data; | 2622 | mddev_t *mddev = (mddev_t *) data; |
@@ -2786,6 +2798,36 @@ static int do_md_run(mddev_t * mddev) | |||
2786 | mddev->queue->queuedata = mddev; | 2798 | mddev->queue->queuedata = mddev; |
2787 | mddev->queue->make_request_fn = mddev->pers->make_request; | 2799 | mddev->queue->make_request_fn = mddev->pers->make_request; |
2788 | 2800 | ||
2801 | /* If there is a partially-recovered drive we need to | ||
2802 | * start recovery here. If we leave it to md_check_recovery, | ||
2803 | * it will remove the drives and not do the right thing | ||
2804 | */ | ||
2805 | if (mddev->degraded) { | ||
2806 | struct list_head *rtmp; | ||
2807 | int spares = 0; | ||
2808 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
2809 | if (rdev->raid_disk >= 0 && | ||
2810 | !test_bit(In_sync, &rdev->flags) && | ||
2811 | !test_bit(Faulty, &rdev->flags)) | ||
2812 | /* complete an interrupted recovery */ | ||
2813 | spares++; | ||
2814 | if (spares && mddev->pers->sync_request) { | ||
2815 | mddev->recovery = 0; | ||
2816 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
2817 | mddev->sync_thread = md_register_thread(md_do_sync, | ||
2818 | mddev, | ||
2819 | "%s_resync"); | ||
2820 | if (!mddev->sync_thread) { | ||
2821 | printk(KERN_ERR "%s: could not start resync" | ||
2822 | " thread...\n", | ||
2823 | mdname(mddev)); | ||
2824 | /* leave the spares where they are, it shouldn't hurt */ | ||
2825 | mddev->recovery = 0; | ||
2826 | } else | ||
2827 | md_wakeup_thread(mddev->sync_thread); | ||
2828 | } | ||
2829 | } | ||
2830 | |||
2789 | mddev->changed = 1; | 2831 | mddev->changed = 1; |
2790 | md_new_event(mddev); | 2832 | md_new_event(mddev); |
2791 | return 0; | 2833 | return 0; |
@@ -2819,6 +2861,7 @@ static int restart_array(mddev_t *mddev) | |||
2819 | */ | 2861 | */ |
2820 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2862 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
2821 | md_wakeup_thread(mddev->thread); | 2863 | md_wakeup_thread(mddev->thread); |
2864 | md_wakeup_thread(mddev->sync_thread); | ||
2822 | err = 0; | 2865 | err = 0; |
2823 | } else { | 2866 | } else { |
2824 | printk(KERN_ERR "md: %s has no personality assigned.\n", | 2867 | printk(KERN_ERR "md: %s has no personality assigned.\n", |
@@ -2842,6 +2885,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2842 | } | 2885 | } |
2843 | 2886 | ||
2844 | if (mddev->sync_thread) { | 2887 | if (mddev->sync_thread) { |
2888 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2845 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | 2889 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); |
2846 | md_unregister_thread(mddev->sync_thread); | 2890 | md_unregister_thread(mddev->sync_thread); |
2847 | mddev->sync_thread = NULL; | 2891 | mddev->sync_thread = NULL; |
@@ -2871,13 +2915,14 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2871 | if (mddev->ro) | 2915 | if (mddev->ro) |
2872 | mddev->ro = 0; | 2916 | mddev->ro = 0; |
2873 | } | 2917 | } |
2874 | if (!mddev->in_sync) { | 2918 | if (!mddev->in_sync || mddev->sb_dirty) { |
2875 | /* mark array as shutdown cleanly */ | 2919 | /* mark array as shutdown cleanly */ |
2876 | mddev->in_sync = 1; | 2920 | mddev->in_sync = 1; |
2877 | md_update_sb(mddev); | 2921 | md_update_sb(mddev); |
2878 | } | 2922 | } |
2879 | if (ro) | 2923 | if (ro) |
2880 | set_disk_ro(disk, 1); | 2924 | set_disk_ro(disk, 1); |
2925 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
2881 | } | 2926 | } |
2882 | 2927 | ||
2883 | /* | 2928 | /* |
@@ -4665,10 +4710,14 @@ void md_do_sync(mddev_t *mddev) | |||
4665 | struct list_head *tmp; | 4710 | struct list_head *tmp; |
4666 | sector_t last_check; | 4711 | sector_t last_check; |
4667 | int skipped = 0; | 4712 | int skipped = 0; |
4713 | struct list_head *rtmp; | ||
4714 | mdk_rdev_t *rdev; | ||
4668 | 4715 | ||
4669 | /* just incase thread restarts... */ | 4716 | /* just incase thread restarts... */ |
4670 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) | 4717 | if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) |
4671 | return; | 4718 | return; |
4719 | if (mddev->ro) /* never try to sync a read-only array */ | ||
4720 | return; | ||
4672 | 4721 | ||
4673 | /* we overload curr_resync somewhat here. | 4722 | /* we overload curr_resync somewhat here. |
4674 | * 0 == not engaged in resync at all | 4723 | * 0 == not engaged in resync at all |
@@ -4727,17 +4776,30 @@ void md_do_sync(mddev_t *mddev) | |||
4727 | } | 4776 | } |
4728 | } while (mddev->curr_resync < 2); | 4777 | } while (mddev->curr_resync < 2); |
4729 | 4778 | ||
4779 | j = 0; | ||
4730 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 4780 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4731 | /* resync follows the size requested by the personality, | 4781 | /* resync follows the size requested by the personality, |
4732 | * which defaults to physical size, but can be virtual size | 4782 | * which defaults to physical size, but can be virtual size |
4733 | */ | 4783 | */ |
4734 | max_sectors = mddev->resync_max_sectors; | 4784 | max_sectors = mddev->resync_max_sectors; |
4735 | mddev->resync_mismatches = 0; | 4785 | mddev->resync_mismatches = 0; |
4786 | /* we don't use the checkpoint if there's a bitmap */ | ||
4787 | if (!mddev->bitmap && | ||
4788 | !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
4789 | j = mddev->recovery_cp; | ||
4736 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4790 | } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4737 | max_sectors = mddev->size << 1; | 4791 | max_sectors = mddev->size << 1; |
4738 | else | 4792 | else { |
4739 | /* recovery follows the physical size of devices */ | 4793 | /* recovery follows the physical size of devices */ |
4740 | max_sectors = mddev->size << 1; | 4794 | max_sectors = mddev->size << 1; |
4795 | j = MaxSector; | ||
4796 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
4797 | if (rdev->raid_disk >= 0 && | ||
4798 | !test_bit(Faulty, &rdev->flags) && | ||
4799 | !test_bit(In_sync, &rdev->flags) && | ||
4800 | rdev->recovery_offset < j) | ||
4801 | j = rdev->recovery_offset; | ||
4802 | } | ||
4741 | 4803 | ||
4742 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); | 4804 | printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); |
4743 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" | 4805 | printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" |
@@ -4747,12 +4809,7 @@ void md_do_sync(mddev_t *mddev) | |||
4747 | speed_max(mddev)); | 4809 | speed_max(mddev)); |
4748 | 4810 | ||
4749 | is_mddev_idle(mddev); /* this also initializes IO event counters */ | 4811 | is_mddev_idle(mddev); /* this also initializes IO event counters */ |
4750 | /* we don't use the checkpoint if there's a bitmap */ | 4812 | |
4751 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap | ||
4752 | && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) | ||
4753 | j = mddev->recovery_cp; | ||
4754 | else | ||
4755 | j = 0; | ||
4756 | io_sectors = 0; | 4813 | io_sectors = 0; |
4757 | for (m = 0; m < SYNC_MARKS; m++) { | 4814 | for (m = 0; m < SYNC_MARKS; m++) { |
4758 | mark[m] = jiffies; | 4815 | mark[m] = jiffies; |
@@ -4873,15 +4930,28 @@ void md_do_sync(mddev_t *mddev) | |||
4873 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && | 4930 | if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && |
4874 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | 4931 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && |
4875 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && | 4932 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && |
4876 | mddev->curr_resync > 2 && | 4933 | mddev->curr_resync > 2) { |
4877 | mddev->curr_resync >= mddev->recovery_cp) { | 4934 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
4878 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 4935 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
4879 | printk(KERN_INFO | 4936 | if (mddev->curr_resync >= mddev->recovery_cp) { |
4880 | "md: checkpointing recovery of %s.\n", | 4937 | printk(KERN_INFO |
4881 | mdname(mddev)); | 4938 | "md: checkpointing recovery of %s.\n", |
4882 | mddev->recovery_cp = mddev->curr_resync; | 4939 | mdname(mddev)); |
4883 | } else | 4940 | mddev->recovery_cp = mddev->curr_resync; |
4884 | mddev->recovery_cp = MaxSector; | 4941 | } |
4942 | } else | ||
4943 | mddev->recovery_cp = MaxSector; | ||
4944 | } else { | ||
4945 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) | ||
4946 | mddev->curr_resync = MaxSector; | ||
4947 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
4948 | if (rdev->raid_disk >= 0 && | ||
4949 | !test_bit(Faulty, &rdev->flags) && | ||
4950 | !test_bit(In_sync, &rdev->flags) && | ||
4951 | rdev->recovery_offset < mddev->curr_resync) | ||
4952 | rdev->recovery_offset = mddev->curr_resync; | ||
4953 | mddev->sb_dirty = 1; | ||
4954 | } | ||
4885 | } | 4955 | } |
4886 | 4956 | ||
4887 | skip: | 4957 | skip: |
@@ -5002,6 +5072,8 @@ void md_check_recovery(mddev_t *mddev) | |||
5002 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); | 5072 | clear_bit(MD_RECOVERY_INTR, &mddev->recovery); |
5003 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); | 5073 | clear_bit(MD_RECOVERY_DONE, &mddev->recovery); |
5004 | 5074 | ||
5075 | if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) | ||
5076 | goto unlock; | ||
5005 | /* no recovery is running. | 5077 | /* no recovery is running. |
5006 | * remove any failed drives, then | 5078 | * remove any failed drives, then |
5007 | * add spares if possible. | 5079 | * add spares if possible. |
@@ -5024,6 +5096,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5024 | ITERATE_RDEV(mddev,rdev,rtmp) | 5096 | ITERATE_RDEV(mddev,rdev,rtmp) |
5025 | if (rdev->raid_disk < 0 | 5097 | if (rdev->raid_disk < 0 |
5026 | && !test_bit(Faulty, &rdev->flags)) { | 5098 | && !test_bit(Faulty, &rdev->flags)) { |
5099 | rdev->recovery_offset = 0; | ||
5027 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | 5100 | if (mddev->pers->hot_add_disk(mddev,rdev)) { |
5028 | char nm[20]; | 5101 | char nm[20]; |
5029 | sprintf(nm, "rd%d", rdev->raid_disk); | 5102 | sprintf(nm, "rd%d", rdev->raid_disk); |