aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-06-26 03:27:40 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-26 12:58:37 -0400
commit5fd6c1dce06ec24ef3de20fe0c7ecf2ba9fe5ef9 (patch)
treebd7cc8c22615e8622d3d63b7fc68dcc3ac1964de /drivers/md/md.c
parenta8a55c387da28d67d98f56e4f5021462cb61f7a6 (diff)
[PATCH] md: allow checkpoint of recovery with version-1 superblock
For a while we have had checkpointing of resync. The version-1 superblock allows recovery to be checkpointed as well, and this patch implements that. Due to early carelessness we need to add a feature flag to signal that the recovery_offset field is in use, otherwise older kernels would assume that a partially recovered array is in fact fully recovered. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c115
1 files changed, 94 insertions, 21 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 71d46eb2c438..a296edd7e1c3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1175,7 +1175,11 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1175 set_bit(Faulty, &rdev->flags); 1175 set_bit(Faulty, &rdev->flags);
1176 break; 1176 break;
1177 default: 1177 default:
1178 set_bit(In_sync, &rdev->flags); 1178 if ((le32_to_cpu(sb->feature_map) &
1179 MD_FEATURE_RECOVERY_OFFSET))
1180 rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
1181 else
1182 set_bit(In_sync, &rdev->flags);
1179 rdev->raid_disk = role; 1183 rdev->raid_disk = role;
1180 break; 1184 break;
1181 } 1185 }
@@ -1199,6 +1203,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1199 1203
1200 sb->feature_map = 0; 1204 sb->feature_map = 0;
1201 sb->pad0 = 0; 1205 sb->pad0 = 0;
1206 sb->recovery_offset = cpu_to_le64(0);
1202 memset(sb->pad1, 0, sizeof(sb->pad1)); 1207 memset(sb->pad1, 0, sizeof(sb->pad1));
1203 memset(sb->pad2, 0, sizeof(sb->pad2)); 1208 memset(sb->pad2, 0, sizeof(sb->pad2));
1204 memset(sb->pad3, 0, sizeof(sb->pad3)); 1209 memset(sb->pad3, 0, sizeof(sb->pad3));
@@ -1219,6 +1224,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1219 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); 1224 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
1220 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); 1225 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1221 } 1226 }
1227
1228 if (rdev->raid_disk >= 0 &&
1229 !test_bit(In_sync, &rdev->flags) &&
1230 rdev->recovery_offset > 0) {
1231 sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
1232 sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
1233 }
1234
1222 if (mddev->reshape_position != MaxSector) { 1235 if (mddev->reshape_position != MaxSector) {
1223 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); 1236 sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
1224 sb->reshape_position = cpu_to_le64(mddev->reshape_position); 1237 sb->reshape_position = cpu_to_le64(mddev->reshape_position);
@@ -1243,11 +1256,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1243 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1256 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1244 else if (test_bit(In_sync, &rdev2->flags)) 1257 else if (test_bit(In_sync, &rdev2->flags))
1245 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk); 1258 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1259 else if (rdev2->raid_disk >= 0 && rdev2->recovery_offset > 0)
1260 sb->dev_roles[i] = cpu_to_le16(rdev2->raid_disk);
1246 else 1261 else
1247 sb->dev_roles[i] = cpu_to_le16(0xffff); 1262 sb->dev_roles[i] = cpu_to_le16(0xffff);
1248 } 1263 }
1249 1264
1250 sb->recovery_offset = cpu_to_le64(0); /* not supported yet */
1251 sb->sb_csum = calc_sb_1_csum(sb); 1265 sb->sb_csum = calc_sb_1_csum(sb);
1252} 1266}
1253 1267
@@ -2603,8 +2617,6 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
2603 return NULL; 2617 return NULL;
2604} 2618}
2605 2619
2606void md_wakeup_thread(mdk_thread_t *thread);
2607
2608static void md_safemode_timeout(unsigned long data) 2620static void md_safemode_timeout(unsigned long data)
2609{ 2621{
2610 mddev_t *mddev = (mddev_t *) data; 2622 mddev_t *mddev = (mddev_t *) data;
@@ -2786,6 +2798,36 @@ static int do_md_run(mddev_t * mddev)
2786 mddev->queue->queuedata = mddev; 2798 mddev->queue->queuedata = mddev;
2787 mddev->queue->make_request_fn = mddev->pers->make_request; 2799 mddev->queue->make_request_fn = mddev->pers->make_request;
2788 2800
2801 /* If there is a partially-recovered drive we need to
2802 * start recovery here. If we leave it to md_check_recovery,
2803 * it will remove the drives and not do the right thing
2804 */
2805 if (mddev->degraded) {
2806 struct list_head *rtmp;
2807 int spares = 0;
2808 ITERATE_RDEV(mddev,rdev,rtmp)
2809 if (rdev->raid_disk >= 0 &&
2810 !test_bit(In_sync, &rdev->flags) &&
2811 !test_bit(Faulty, &rdev->flags))
2812 /* complete an interrupted recovery */
2813 spares++;
2814 if (spares && mddev->pers->sync_request) {
2815 mddev->recovery = 0;
2816 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
2817 mddev->sync_thread = md_register_thread(md_do_sync,
2818 mddev,
2819 "%s_resync");
2820 if (!mddev->sync_thread) {
2821 printk(KERN_ERR "%s: could not start resync"
2822 " thread...\n",
2823 mdname(mddev));
2824 /* leave the spares where they are, it shouldn't hurt */
2825 mddev->recovery = 0;
2826 } else
2827 md_wakeup_thread(mddev->sync_thread);
2828 }
2829 }
2830
2789 mddev->changed = 1; 2831 mddev->changed = 1;
2790 md_new_event(mddev); 2832 md_new_event(mddev);
2791 return 0; 2833 return 0;
@@ -2819,6 +2861,7 @@ static int restart_array(mddev_t *mddev)
2819 */ 2861 */
2820 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 2862 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
2821 md_wakeup_thread(mddev->thread); 2863 md_wakeup_thread(mddev->thread);
2864 md_wakeup_thread(mddev->sync_thread);
2822 err = 0; 2865 err = 0;
2823 } else { 2866 } else {
2824 printk(KERN_ERR "md: %s has no personality assigned.\n", 2867 printk(KERN_ERR "md: %s has no personality assigned.\n",
@@ -2842,6 +2885,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
2842 } 2885 }
2843 2886
2844 if (mddev->sync_thread) { 2887 if (mddev->sync_thread) {
2888 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2845 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 2889 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2846 md_unregister_thread(mddev->sync_thread); 2890 md_unregister_thread(mddev->sync_thread);
2847 mddev->sync_thread = NULL; 2891 mddev->sync_thread = NULL;
@@ -2871,13 +2915,14 @@ static int do_md_stop(mddev_t * mddev, int ro)
2871 if (mddev->ro) 2915 if (mddev->ro)
2872 mddev->ro = 0; 2916 mddev->ro = 0;
2873 } 2917 }
2874 if (!mddev->in_sync) { 2918 if (!mddev->in_sync || mddev->sb_dirty) {
2875 /* mark array as shutdown cleanly */ 2919 /* mark array as shutdown cleanly */
2876 mddev->in_sync = 1; 2920 mddev->in_sync = 1;
2877 md_update_sb(mddev); 2921 md_update_sb(mddev);
2878 } 2922 }
2879 if (ro) 2923 if (ro)
2880 set_disk_ro(disk, 1); 2924 set_disk_ro(disk, 1);
2925 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
2881 } 2926 }
2882 2927
2883 /* 2928 /*
@@ -4665,10 +4710,14 @@ void md_do_sync(mddev_t *mddev)
4665 struct list_head *tmp; 4710 struct list_head *tmp;
4666 sector_t last_check; 4711 sector_t last_check;
4667 int skipped = 0; 4712 int skipped = 0;
4713 struct list_head *rtmp;
4714 mdk_rdev_t *rdev;
4668 4715
4669 /* just incase thread restarts... */ 4716 /* just incase thread restarts... */
4670 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) 4717 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
4671 return; 4718 return;
4719 if (mddev->ro) /* never try to sync a read-only array */
4720 return;
4672 4721
4673 /* we overload curr_resync somewhat here. 4722 /* we overload curr_resync somewhat here.
4674 * 0 == not engaged in resync at all 4723 * 0 == not engaged in resync at all
@@ -4727,17 +4776,30 @@ void md_do_sync(mddev_t *mddev)
4727 } 4776 }
4728 } while (mddev->curr_resync < 2); 4777 } while (mddev->curr_resync < 2);
4729 4778
4779 j = 0;
4730 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 4780 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4731 /* resync follows the size requested by the personality, 4781 /* resync follows the size requested by the personality,
4732 * which defaults to physical size, but can be virtual size 4782 * which defaults to physical size, but can be virtual size
4733 */ 4783 */
4734 max_sectors = mddev->resync_max_sectors; 4784 max_sectors = mddev->resync_max_sectors;
4735 mddev->resync_mismatches = 0; 4785 mddev->resync_mismatches = 0;
4786 /* we don't use the checkpoint if there's a bitmap */
4787 if (!mddev->bitmap &&
4788 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4789 j = mddev->recovery_cp;
4736 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 4790 } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4737 max_sectors = mddev->size << 1; 4791 max_sectors = mddev->size << 1;
4738 else 4792 else {
4739 /* recovery follows the physical size of devices */ 4793 /* recovery follows the physical size of devices */
4740 max_sectors = mddev->size << 1; 4794 max_sectors = mddev->size << 1;
4795 j = MaxSector;
4796 ITERATE_RDEV(mddev,rdev,rtmp)
4797 if (rdev->raid_disk >= 0 &&
4798 !test_bit(Faulty, &rdev->flags) &&
4799 !test_bit(In_sync, &rdev->flags) &&
4800 rdev->recovery_offset < j)
4801 j = rdev->recovery_offset;
4802 }
4741 4803
4742 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); 4804 printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
4743 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" 4805 printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
@@ -4747,12 +4809,7 @@ void md_do_sync(mddev_t *mddev)
4747 speed_max(mddev)); 4809 speed_max(mddev));
4748 4810
4749 is_mddev_idle(mddev); /* this also initializes IO event counters */ 4811 is_mddev_idle(mddev); /* this also initializes IO event counters */
4750 /* we don't use the checkpoint if there's a bitmap */ 4812
4751 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
4752 && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
4753 j = mddev->recovery_cp;
4754 else
4755 j = 0;
4756 io_sectors = 0; 4813 io_sectors = 0;
4757 for (m = 0; m < SYNC_MARKS; m++) { 4814 for (m = 0; m < SYNC_MARKS; m++) {
4758 mark[m] = jiffies; 4815 mark[m] = jiffies;
@@ -4873,15 +4930,28 @@ void md_do_sync(mddev_t *mddev)
4873 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && 4930 if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
4874 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && 4931 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
4875 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && 4932 !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
4876 mddev->curr_resync > 2 && 4933 mddev->curr_resync > 2) {
4877 mddev->curr_resync >= mddev->recovery_cp) { 4934 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
4878 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 4935 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4879 printk(KERN_INFO 4936 if (mddev->curr_resync >= mddev->recovery_cp) {
4880 "md: checkpointing recovery of %s.\n", 4937 printk(KERN_INFO
4881 mdname(mddev)); 4938 "md: checkpointing recovery of %s.\n",
4882 mddev->recovery_cp = mddev->curr_resync; 4939 mdname(mddev));
4883 } else 4940 mddev->recovery_cp = mddev->curr_resync;
4884 mddev->recovery_cp = MaxSector; 4941 }
4942 } else
4943 mddev->recovery_cp = MaxSector;
4944 } else {
4945 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
4946 mddev->curr_resync = MaxSector;
4947 ITERATE_RDEV(mddev,rdev,rtmp)
4948 if (rdev->raid_disk >= 0 &&
4949 !test_bit(Faulty, &rdev->flags) &&
4950 !test_bit(In_sync, &rdev->flags) &&
4951 rdev->recovery_offset < mddev->curr_resync)
4952 rdev->recovery_offset = mddev->curr_resync;
4953 mddev->sb_dirty = 1;
4954 }
4885 } 4955 }
4886 4956
4887 skip: 4957 skip:
@@ -5002,6 +5072,8 @@ void md_check_recovery(mddev_t *mddev)
5002 clear_bit(MD_RECOVERY_INTR, &mddev->recovery); 5072 clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
5003 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 5073 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
5004 5074
5075 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
5076 goto unlock;
5005 /* no recovery is running. 5077 /* no recovery is running.
5006 * remove any failed drives, then 5078 * remove any failed drives, then
5007 * add spares if possible. 5079 * add spares if possible.
@@ -5024,6 +5096,7 @@ void md_check_recovery(mddev_t *mddev)
5024 ITERATE_RDEV(mddev,rdev,rtmp) 5096 ITERATE_RDEV(mddev,rdev,rtmp)
5025 if (rdev->raid_disk < 0 5097 if (rdev->raid_disk < 0
5026 && !test_bit(Faulty, &rdev->flags)) { 5098 && !test_bit(Faulty, &rdev->flags)) {
5099 rdev->recovery_offset = 0;
5027 if (mddev->pers->hot_add_disk(mddev,rdev)) { 5100 if (mddev->pers->hot_add_disk(mddev,rdev)) {
5028 char nm[20]; 5101 char nm[20];
5029 sprintf(nm, "rd%d", rdev->raid_disk); 5102 sprintf(nm, "rd%d", rdev->raid_disk);