aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGoldwyn Rodrigues <rgoldwyn@suse.com>2015-08-18 18:14:42 -0400
committerGoldwyn Rodrigues <rgoldwyn@suse.com>2015-10-12 02:32:05 -0400
commitc40f341f1e7fd4eddcfc5881d94cfa8669071ee6 (patch)
treed8d572cb6b88dcd1102596d31b2bd153f79fdaab
parent3c462c880b52aae2cfbbb8db8b401eef118cc128 (diff)
md-cluster: Use a small window for resync
Suspending the entire device for resync could take too long. Resync in small chunks. cluster's resync window (32M) is maintained in r1conf as cluster_sync_low and cluster_sync_high and processed in raid1's sync_request(). If the current resync is outside the cluster resync window: 1. Set the cluster_sync_low to curr_resync_completed. 2. Check if the sync will fit in the new window, if not issue a wait_barrier() and set cluster_sync_low to sector_nr. 3. Set cluster_sync_high to cluster_sync_low + resync_window. 4. Send a message to all nodes so they may add it in their suspension list. bitmap_cond_end_sync is modified to allow to force a sync inorder to get the curr_resync_completed uptodate with the sector passed. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/bitmap.c4
-rw-r--r--drivers/md/bitmap.h2
-rw-r--r--drivers/md/md-cluster.c41
-rw-r--r--drivers/md/md-cluster.h4
-rw-r--r--drivers/md/md.c8
-rw-r--r--drivers/md/raid1.c26
-rw-r--r--drivers/md/raid1.h7
-rw-r--r--drivers/md/raid10.c2
-rw-r--r--drivers/md/raid5.c2
9 files changed, 43 insertions, 53 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index e9d3ee703e6d..4f22e919787a 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1570,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
1570} 1570}
1571EXPORT_SYMBOL(bitmap_close_sync); 1571EXPORT_SYMBOL(bitmap_close_sync);
1572 1572
1573void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1573void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
1574{ 1574{
1575 sector_t s = 0; 1575 sector_t s = 0;
1576 sector_t blocks; 1576 sector_t blocks;
@@ -1581,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1581 bitmap->last_end_sync = jiffies; 1581 bitmap->last_end_sync = jiffies;
1582 return; 1582 return;
1583 } 1583 }
1584 if (time_before(jiffies, (bitmap->last_end_sync 1584 if (!force && time_before(jiffies, (bitmap->last_end_sync
1585 + bitmap->mddev->bitmap_info.daemon_sleep))) 1585 + bitmap->mddev->bitmap_info.daemon_sleep)))
1586 return; 1586 return;
1587 wait_event(bitmap->mddev->recovery_wait, 1587 wait_event(bitmap->mddev->recovery_wait,
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 8731fa06855f..7d5c3a610ca5 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -257,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
257int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded); 257int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
258void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted); 258void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
259void bitmap_close_sync(struct bitmap *bitmap); 259void bitmap_close_sync(struct bitmap *bitmap);
260void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector); 260void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
261 261
262void bitmap_unplug(struct bitmap *bitmap); 262void bitmap_unplug(struct bitmap *bitmap);
263void bitmap_daemon_work(struct mddev *mddev); 263void bitmap_daemon_work(struct mddev *mddev);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 4a965f22be20..b94a2e68ef43 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -802,15 +802,6 @@ static int slot_number(struct mddev *mddev)
802 return cinfo->slot_number - 1; 802 return cinfo->slot_number - 1;
803} 803}
804 804
805static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
806{
807 struct md_cluster_info *cinfo = mddev->cluster_info;
808
809 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
810 /* Re-acquire the lock to refresh LVB */
811 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
812}
813
814static int metadata_update_start(struct mddev *mddev) 805static int metadata_update_start(struct mddev *mddev)
815{ 806{
816 return lock_comm(mddev->cluster_info); 807 return lock_comm(mddev->cluster_info);
@@ -836,45 +827,25 @@ static int metadata_update_cancel(struct mddev *mddev)
836 return dlm_unlock_sync(cinfo->token_lockres); 827 return dlm_unlock_sync(cinfo->token_lockres);
837} 828}
838 829
839static int resync_send(struct mddev *mddev, enum msg_type type, 830static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
840 sector_t lo, sector_t hi)
841{ 831{
842 struct md_cluster_info *cinfo = mddev->cluster_info; 832 struct md_cluster_info *cinfo = mddev->cluster_info;
843 struct cluster_msg cmsg; 833 struct cluster_msg cmsg;
844 int slot = cinfo->slot_number - 1; 834 int slot = cinfo->slot_number - 1;
845 835
836 add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
837 /* Re-acquire the lock to refresh LVB */
838 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
846 pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__, 839 pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
847 (unsigned long long)lo, 840 (unsigned long long)lo,
848 (unsigned long long)hi); 841 (unsigned long long)hi);
849 resync_info_update(mddev, lo, hi); 842 cmsg.type = cpu_to_le32(RESYNCING);
850 cmsg.type = cpu_to_le32(type);
851 cmsg.slot = cpu_to_le32(slot); 843 cmsg.slot = cpu_to_le32(slot);
852 cmsg.low = cpu_to_le64(lo); 844 cmsg.low = cpu_to_le64(lo);
853 cmsg.high = cpu_to_le64(hi); 845 cmsg.high = cpu_to_le64(hi);
854 return sendmsg(cinfo, &cmsg); 846 return sendmsg(cinfo, &cmsg);
855} 847}
856 848
857static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
858{
859 pr_info("%s:%d\n", __func__, __LINE__);
860 return resync_send(mddev, RESYNCING, lo, hi);
861}
862
863static void resync_finish(struct mddev *mddev)
864{
865 struct md_cluster_info *cinfo = mddev->cluster_info;
866 struct cluster_msg cmsg;
867 int slot = cinfo->slot_number - 1;
868
869 pr_info("%s:%d\n", __func__, __LINE__);
870 resync_send(mddev, RESYNCING, 0, 0);
871 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
872 cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
873 cmsg.slot = cpu_to_le32(slot);
874 sendmsg(cinfo, &cmsg);
875 }
876}
877
878static int area_resyncing(struct mddev *mddev, int direction, 849static int area_resyncing(struct mddev *mddev, int direction,
879 sector_t lo, sector_t hi) 850 sector_t lo, sector_t hi)
880{ 851{
@@ -997,8 +968,6 @@ static struct md_cluster_operations cluster_ops = {
997 .leave = leave, 968 .leave = leave,
998 .slot_number = slot_number, 969 .slot_number = slot_number,
999 .resync_info_update = resync_info_update, 970 .resync_info_update = resync_info_update,
1000 .resync_start = resync_start,
1001 .resync_finish = resync_finish,
1002 .metadata_update_start = metadata_update_start, 971 .metadata_update_start = metadata_update_start,
1003 .metadata_update_finish = metadata_update_finish, 972 .metadata_update_finish = metadata_update_finish,
1004 .metadata_update_cancel = metadata_update_cancel, 973 .metadata_update_cancel = metadata_update_cancel,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 00defe2badbc..f5bdc0c86eaa 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -12,9 +12,7 @@ struct md_cluster_operations {
12 int (*join)(struct mddev *mddev, int nodes); 12 int (*join)(struct mddev *mddev, int nodes);
13 int (*leave)(struct mddev *mddev); 13 int (*leave)(struct mddev *mddev);
14 int (*slot_number)(struct mddev *mddev); 14 int (*slot_number)(struct mddev *mddev);
15 void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi); 15 int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
16 int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
17 void (*resync_finish)(struct mddev *mddev);
18 int (*metadata_update_start)(struct mddev *mddev); 16 int (*metadata_update_start)(struct mddev *mddev);
19 int (*metadata_update_finish)(struct mddev *mddev); 17 int (*metadata_update_finish)(struct mddev *mddev);
20 int (*metadata_update_cancel)(struct mddev *mddev); 18 int (*metadata_update_cancel)(struct mddev *mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1e1bdd86f40c..9798a9921a38 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7805,9 +7805,6 @@ void md_do_sync(struct md_thread *thread)
7805 md_new_event(mddev); 7805 md_new_event(mddev);
7806 update_time = jiffies; 7806 update_time = jiffies;
7807 7807
7808 if (mddev_is_clustered(mddev))
7809 md_cluster_ops->resync_start(mddev, j, max_sectors);
7810
7811 blk_start_plug(&plug); 7808 blk_start_plug(&plug);
7812 while (j < max_sectors) { 7809 while (j < max_sectors) {
7813 sector_t sectors; 7810 sector_t sectors;
@@ -7871,8 +7868,6 @@ void md_do_sync(struct md_thread *thread)
7871 j = max_sectors; 7868 j = max_sectors;
7872 if (j > 2) 7869 if (j > 2)
7873 mddev->curr_resync = j; 7870 mddev->curr_resync = j;
7874 if (mddev_is_clustered(mddev))
7875 md_cluster_ops->resync_info_update(mddev, j, max_sectors);
7876 mddev->curr_mark_cnt = io_sectors; 7871 mddev->curr_mark_cnt = io_sectors;
7877 if (last_check == 0) 7872 if (last_check == 0)
7878 /* this is the earliest that rebuild will be 7873 /* this is the earliest that rebuild will be
@@ -7979,9 +7974,6 @@ void md_do_sync(struct md_thread *thread)
7979 } 7974 }
7980 } 7975 }
7981 skip: 7976 skip:
7982 if (mddev_is_clustered(mddev))
7983 md_cluster_ops->resync_finish(mddev);
7984
7985 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7977 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7986 7978
7987 spin_lock(&mddev->lock); 7979 spin_lock(&mddev->lock);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 049df6c4a8cc..1dd13bb52940 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -90,6 +90,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
90#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) 90#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
91#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH) 91#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
92#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9) 92#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
93#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
94#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
93#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS) 95#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
94 96
95static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) 97static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
@@ -2488,6 +2490,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2488 2490
2489 bitmap_close_sync(mddev->bitmap); 2491 bitmap_close_sync(mddev->bitmap);
2490 close_sync(conf); 2492 close_sync(conf);
2493
2494 if (mddev_is_clustered(mddev)) {
2495 conf->cluster_sync_low = 0;
2496 conf->cluster_sync_high = 0;
2497 /* Send zeros to mark end of resync */
2498 md_cluster_ops->resync_info_update(mddev, 0, 0);
2499 }
2491 return 0; 2500 return 0;
2492 } 2501 }
2493 2502
@@ -2508,7 +2517,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2508 return sync_blocks; 2517 return sync_blocks;
2509 } 2518 }
2510 2519
2511 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 2520 /* we are incrementing sector_nr below. To be safe, we check against
2521 * sector_nr + two times RESYNC_SECTORS
2522 */
2523
2524 bitmap_cond_end_sync(mddev->bitmap, sector_nr,
2525 mddev_is_clustered(mddev) && (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
2512 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); 2526 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
2513 2527
2514 raise_barrier(conf, sector_nr); 2528 raise_barrier(conf, sector_nr);
@@ -2699,6 +2713,16 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2699 bio_full: 2713 bio_full:
2700 r1_bio->sectors = nr_sectors; 2714 r1_bio->sectors = nr_sectors;
2701 2715
2716 if (mddev_is_clustered(mddev) &&
2717 conf->cluster_sync_high < sector_nr + nr_sectors) {
2718 conf->cluster_sync_low = mddev->curr_resync_completed;
2719 conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
2720 /* Send resync message */
2721 md_cluster_ops->resync_info_update(mddev,
2722 conf->cluster_sync_low,
2723 conf->cluster_sync_high);
2724 }
2725
2702 /* For a user-requested sync, we read all readable devices and do a 2726 /* For a user-requested sync, we read all readable devices and do a
2703 * compare 2727 * compare
2704 */ 2728 */
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index c52d7139c5d7..61c39b390cd8 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -111,6 +111,13 @@ struct r1conf {
111 * the new thread here until we fully activate the array. 111 * the new thread here until we fully activate the array.
112 */ 112 */
113 struct md_thread *thread; 113 struct md_thread *thread;
114
115 /* Keep track of cluster resync window to send to other
116 * nodes.
117 */
118 sector_t cluster_sync_low;
119 sector_t cluster_sync_high;
120
114}; 121};
115 122
116/* 123/*
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7c99a4037715..5f30b7526c1f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3137,7 +3137,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3137 /* resync. Schedule a read for every block at this virt offset */ 3137 /* resync. Schedule a read for every block at this virt offset */
3138 int count = 0; 3138 int count = 0;
3139 3139
3140 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 3140 bitmap_cond_end_sync(mddev->bitmap, sector_nr, 0);
3141 3141
3142 if (!bitmap_start_sync(mddev->bitmap, sector_nr, 3142 if (!bitmap_start_sync(mddev->bitmap, sector_nr,
3143 &sync_blocks, mddev->degraded) && 3143 &sync_blocks, mddev->degraded) &&
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49bb8d3ff9be..5b79770c4f08 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5613,7 +5613,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
5613 return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ 5613 return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
5614 } 5614 }
5615 5615
5616 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 5616 bitmap_cond_end_sync(mddev->bitmap, sector_nr, false);
5617 5617
5618 sh = get_active_stripe(conf, sector_nr, 0, 1, 0); 5618 sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
5619 if (sh == NULL) { 5619 if (sh == NULL) {