diff options
author | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-08-18 18:14:42 -0400 |
---|---|---|
committer | Goldwyn Rodrigues <rgoldwyn@suse.com> | 2015-10-12 02:32:05 -0400 |
commit | c40f341f1e7fd4eddcfc5881d94cfa8669071ee6 (patch) | |
tree | d8d572cb6b88dcd1102596d31b2bd153f79fdaab | |
parent | 3c462c880b52aae2cfbbb8db8b401eef118cc128 (diff) |
md-cluster: Use a small window for resync
Suspending the entire device for resync could take too long. Resync
in small chunks.
cluster's resync window (32M) is maintained in r1conf as
cluster_sync_low and cluster_sync_high and processed in
raid1's sync_request(). If the current resync is outside the cluster
resync window:
1. Set the cluster_sync_low to curr_resync_completed.
2. Check if the sync will fit in the new window, if not issue a
wait_barrier() and set cluster_sync_low to sector_nr.
3. Set cluster_sync_high to cluster_sync_low + resync_window.
4. Send a message to all nodes so they may add it in their suspension
list.
bitmap_cond_end_sync is modified to allow to force a sync inorder
to get the curr_resync_completed uptodate with the sector passed.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 2 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 41 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 26 | ||||
-rw-r--r-- | drivers/md/raid1.h | 7 | ||||
-rw-r--r-- | drivers/md/raid10.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 |
9 files changed, 43 insertions, 53 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e9d3ee703e6d..4f22e919787a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap) | |||
1570 | } | 1570 | } |
1571 | EXPORT_SYMBOL(bitmap_close_sync); | 1571 | EXPORT_SYMBOL(bitmap_close_sync); |
1572 | 1572 | ||
1573 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | 1573 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) |
1574 | { | 1574 | { |
1575 | sector_t s = 0; | 1575 | sector_t s = 0; |
1576 | sector_t blocks; | 1576 | sector_t blocks; |
@@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) | |||
1581 | bitmap->last_end_sync = jiffies; | 1581 | bitmap->last_end_sync = jiffies; |
1582 | return; | 1582 | return; |
1583 | } | 1583 | } |
1584 | if (time_before(jiffies, (bitmap->last_end_sync | 1584 | if (!force && time_before(jiffies, (bitmap->last_end_sync |
1585 | + bitmap->mddev->bitmap_info.daemon_sleep))) | 1585 | + bitmap->mddev->bitmap_info.daemon_sleep))) |
1586 | return; | 1586 | return; |
1587 | wait_event(bitmap->mddev->recovery_wait, | 1587 | wait_event(bitmap->mddev->recovery_wait, |
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 8731fa06855f..7d5c3a610ca5 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, | |||
257 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); | 257 | int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); |
258 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); | 258 | void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); |
259 | void bitmap_close_sync(struct bitmap *bitmap); | 259 | void bitmap_close_sync(struct bitmap *bitmap); |
260 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); | 260 | void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force); |
261 | 261 | ||
262 | void bitmap_unplug(struct bitmap *bitmap); | 262 | void bitmap_unplug(struct bitmap *bitmap); |
263 | void bitmap_daemon_work(struct mddev *mddev); | 263 | void bitmap_daemon_work(struct mddev *mddev); |
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 4a965f22be20..b94a2e68ef43 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev) | |||
802 | return cinfo->slot_number - 1; | 802 | return cinfo->slot_number - 1; |
803 | } | 803 | } |
804 | 804 | ||
805 | static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) | ||
806 | { | ||
807 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
808 | |||
809 | add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); | ||
810 | /* Re-acquire the lock to refresh LVB */ | ||
811 | dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); | ||
812 | } | ||
813 | |||
814 | static int metadata_update_start(struct mddev *mddev) | 805 | static int metadata_update_start(struct mddev *mddev) |
815 | { | 806 | { |
816 | return lock_comm(mddev->cluster_info); | 807 | return lock_comm(mddev->cluster_info); |
@@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev) | |||
836 | return dlm_unlock_sync(cinfo->token_lockres); | 827 | return dlm_unlock_sync(cinfo->token_lockres); |
837 | } | 828 | } |
838 | 829 | ||
839 | static int resync_send(struct mddev *mddev, enum msg_type type, | 830 | static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) |
840 | sector_t lo, sector_t hi) | ||
841 | { | 831 | { |
842 | struct md_cluster_info *cinfo = mddev->cluster_info; | 832 | struct md_cluster_info *cinfo = mddev->cluster_info; |
843 | struct cluster_msg cmsg; | 833 | struct cluster_msg cmsg; |
844 | int slot = cinfo->slot_number - 1; | 834 | int slot = cinfo->slot_number - 1; |
845 | 835 | ||
836 | add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi); | ||
837 | /* Re-acquire the lock to refresh LVB */ | ||
838 | dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW); | ||
846 | pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, | 839 | pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, |
847 | (unsigned long long)lo, | 840 | (unsigned long long)lo, |
848 | (unsigned long long)hi); | 841 | (unsigned long long)hi); |
849 | resync_info_update(mddev, lo, hi); | 842 | cmsg.type = cpu_to_le32(RESYNCING); |
850 | cmsg.type = cpu_to_le32(type); | ||
851 | cmsg.slot = cpu_to_le32(slot); | 843 | cmsg.slot = cpu_to_le32(slot); |
852 | cmsg.low = cpu_to_le64(lo); | 844 | cmsg.low = cpu_to_le64(lo); |
853 | cmsg.high = cpu_to_le64(hi); | 845 | cmsg.high = cpu_to_le64(hi); |
854 | return sendmsg(cinfo, &cmsg); | 846 | return sendmsg(cinfo, &cmsg); |
855 | } | 847 | } |
856 | 848 | ||
857 | static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi) | ||
858 | { | ||
859 | pr_info("%s:%d\n", __func__, __LINE__); | ||
860 | return resync_send(mddev, RESYNCING, lo, hi); | ||
861 | } | ||
862 | |||
863 | static void resync_finish(struct mddev *mddev) | ||
864 | { | ||
865 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
866 | struct cluster_msg cmsg; | ||
867 | int slot = cinfo->slot_number - 1; | ||
868 | |||
869 | pr_info("%s:%d\n", __func__, __LINE__); | ||
870 | resync_send(mddev, RESYNCING, 0, 0); | ||
871 | if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | ||
872 | cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC); | ||
873 | cmsg.slot = cpu_to_le32(slot); | ||
874 | sendmsg(cinfo, &cmsg); | ||
875 | } | ||
876 | } | ||
877 | |||
878 | static int area_resyncing(struct mddev *mddev, int direction, | 849 | static int area_resyncing(struct mddev *mddev, int direction, |
879 | sector_t lo, sector_t hi) | 850 | sector_t lo, sector_t hi) |
880 | { | 851 | { |
@@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = { | |||
997 | .leave = leave, | 968 | .leave = leave, |
998 | .slot_number = slot_number, | 969 | .slot_number = slot_number, |
999 | .resync_info_update = resync_info_update, | 970 | .resync_info_update = resync_info_update, |
1000 | .resync_start = resync_start, | ||
1001 | .resync_finish = resync_finish, | ||
1002 | .metadata_update_start = metadata_update_start, | 971 | .metadata_update_start = metadata_update_start, |
1003 | .metadata_update_finish = metadata_update_finish, | 972 | .metadata_update_finish = metadata_update_finish, |
1004 | .metadata_update_cancel = metadata_update_cancel, | 973 | .metadata_update_cancel = metadata_update_cancel, |
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 00defe2badbc..f5bdc0c86eaa 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h | |||
@@ -12,9 +12,7 @@ struct md_cluster_operations { | |||
12 | int (*join)(struct mddev *mddev, int nodes); | 12 | int (*join)(struct mddev *mddev, int nodes); |
13 | int (*leave)(struct mddev *mddev); | 13 | int (*leave)(struct mddev *mddev); |
14 | int (*slot_number)(struct mddev *mddev); | 14 | int (*slot_number)(struct mddev *mddev); |
15 | void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); | 15 | int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); |
16 | int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi); | ||
17 | void (*resync_finish)(struct mddev *mddev); | ||
18 | int (*metadata_update_start)(struct mddev *mddev); | 16 | int (*metadata_update_start)(struct mddev *mddev); |
19 | int (*metadata_update_finish)(struct mddev *mddev); | 17 | int (*metadata_update_finish)(struct mddev *mddev); |
20 | int (*metadata_update_cancel)(struct mddev *mddev); | 18 | int (*metadata_update_cancel)(struct mddev *mddev); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 1e1bdd86f40c..9798a9921a38 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread) | |||
7805 | md_new_event(mddev); | 7805 | md_new_event(mddev); |
7806 | update_time = jiffies; | 7806 | update_time = jiffies; |
7807 | 7807 | ||
7808 | if (mddev_is_clustered(mddev)) | ||
7809 | md_cluster_ops->resync_start(mddev, j, max_sectors); | ||
7810 | |||
7811 | blk_start_plug(&plug); | 7808 | blk_start_plug(&plug); |
7812 | while (j < max_sectors) { | 7809 | while (j < max_sectors) { |
7813 | sector_t sectors; | 7810 | sector_t sectors; |
@@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread) | |||
7871 | j = max_sectors; | 7868 | j = max_sectors; |
7872 | if (j > 2) | 7869 | if (j > 2) |
7873 | mddev->curr_resync = j; | 7870 | mddev->curr_resync = j; |
7874 | if (mddev_is_clustered(mddev)) | ||
7875 | md_cluster_ops->resync_info_update(mddev, j, max_sectors); | ||
7876 | mddev->curr_mark_cnt = io_sectors; | 7871 | mddev->curr_mark_cnt = io_sectors; |
7877 | if (last_check == 0) | 7872 | if (last_check == 0) |
7878 | /* this is the earliest that rebuild will be | 7873 | /* this is the earliest that rebuild will be |
@@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread) | |||
7979 | } | 7974 | } |
7980 | } | 7975 | } |
7981 | skip: | 7976 | skip: |
7982 | if (mddev_is_clustered(mddev)) | ||
7983 | md_cluster_ops->resync_finish(mddev); | ||
7984 | |||
7985 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 7977 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
7986 | 7978 | ||
7987 | spin_lock(&mddev->lock); | 7979 | spin_lock(&mddev->lock); |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 049df6c4a8cc..1dd13bb52940 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data) | |||
90 | #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) | 90 | #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) |
91 | #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) | 91 | #define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) |
92 | #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) | 92 | #define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) |
93 | #define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW) | ||
94 | #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9) | ||
93 | #define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) | 95 | #define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) |
94 | 96 | ||
95 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) | 97 | static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) |
@@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2488 | 2490 | ||
2489 | bitmap_close_sync(mddev->bitmap); | 2491 | bitmap_close_sync(mddev->bitmap); |
2490 | close_sync(conf); | 2492 | close_sync(conf); |
2493 | |||
2494 | if (mddev_is_clustered(mddev)) { | ||
2495 | conf->cluster_sync_low = 0; | ||
2496 | conf->cluster_sync_high = 0; | ||
2497 | /* Send zeros to mark end of resync */ | ||
2498 | md_cluster_ops->resync_info_update(mddev, 0, 0); | ||
2499 | } | ||
2491 | return 0; | 2500 | return 0; |
2492 | } | 2501 | } |
2493 | 2502 | ||
@@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2508 | return sync_blocks; | 2517 | return sync_blocks; |
2509 | } | 2518 | } |
2510 | 2519 | ||
2511 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | 2520 | /* we are incrementing sector_nr below. To be safe, we check against |
2521 | * sector_nr + two times RESYNC_SECTORS | ||
2522 | */ | ||
2523 | |||
2524 | bitmap_cond_end_sync(mddev->bitmap, sector_nr, | ||
2525 | mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); | ||
2512 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | 2526 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); |
2513 | 2527 | ||
2514 | raise_barrier(conf, sector_nr); | 2528 | raise_barrier(conf, sector_nr); |
@@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2699 | bio_full: | 2713 | bio_full: |
2700 | r1_bio->sectors = nr_sectors; | 2714 | r1_bio->sectors = nr_sectors; |
2701 | 2715 | ||
2716 | if (mddev_is_clustered(mddev) && | ||
2717 | conf->cluster_sync_high < sector_nr + nr_sectors) { | ||
2718 | conf->cluster_sync_low = mddev->curr_resync_completed; | ||
2719 | conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS; | ||
2720 | /* Send resync message */ | ||
2721 | md_cluster_ops->resync_info_update(mddev, | ||
2722 | conf->cluster_sync_low, | ||
2723 | conf->cluster_sync_high); | ||
2724 | } | ||
2725 | |||
2702 | /* For a user-requested sync, we read all readable devices and do a | 2726 | /* For a user-requested sync, we read all readable devices and do a |
2703 | * compare | 2727 | * compare |
2704 | */ | 2728 | */ |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index c52d7139c5d7..61c39b390cd8 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -111,6 +111,13 @@ struct r1conf { | |||
111 | * the new thread here until we fully activate the array. | 111 | * the new thread here until we fully activate the array. |
112 | */ | 112 | */ |
113 | struct md_thread *thread; | 113 | struct md_thread *thread; |
114 | |||
115 | /* Keep track of cluster resync window to send to other | ||
116 | * nodes. | ||
117 | */ | ||
118 | sector_t cluster_sync_low; | ||
119 | sector_t cluster_sync_high; | ||
120 | |||
114 | }; | 121 | }; |
115 | 122 | ||
116 | /* | 123 | /* |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 7c99a4037715..5f30b7526c1f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
3137 | /* resync. Schedule a read for every block at this virt offset */ | 3137 | /* resync. Schedule a read for every block at this virt offset */ |
3138 | int count = 0; | 3138 | int count = 0; |
3139 | 3139 | ||
3140 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | 3140 | bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0); |
3141 | 3141 | ||
3142 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, | 3142 | if (!bitmap_start_sync(mddev->bitmap, sector_nr, |
3143 | &sync_blocks, mddev->degraded) && | 3143 | &sync_blocks, mddev->degraded) && |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 49bb8d3ff9be..5b79770c4f08 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int | |||
5613 | return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ | 5613 | return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ |
5614 | } | 5614 | } |
5615 | 5615 | ||
5616 | bitmap_cond_end_sync(mddev->bitmap, sector_nr); | 5616 | bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); |
5617 | 5617 | ||
5618 | sh = get_active_stripe(conf, sector_nr, 0, 1, 0); | 5618 | sh = get_active_stripe(conf, sector_nr, 0, 1, 0); |
5619 | if (sh == NULL) { | 5619 | if (sh == NULL) { |