diff options
author | Guoqing Jiang <gqjiang@suse.com> | 2016-05-04 02:17:09 -0400 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-05-09 12:24:03 -0400 |
commit | 51e453aecb267b6a99b1d2853bccd5bba7340236 (patch) | |
tree | c1a405b2f33b0798da25fa3e410d760c1364dc22 | |
parent | 85ad1d13ee9b3db00615ea24b031c15e5ba14fd1 (diff) |
md-cluster: gather resync infos and enable recv_thread after bitmap is ready
The in-memory bitmap is not ready when node joins cluster,
so it doesn't make sense to make gather_all_resync_info()
called so earlier, we need to call it after the node's
bitmap is setup. Also, recv_thread could be wake up after
node joins cluster, but it could cause problem if node
receives RESYNCING message without persionality since
mddev->pers->quiesce is called in process_suspend_info.
This commit introduces a new cluster interface load_bitmaps
to fix above problems, load_bitmaps is called in bitmap_load
where bitmap and persionality are ready, and load_bitmaps
does the following tasks:
1. call gather_all_resync_info to load all the node's
bitmap info.
2. set MD_CLUSTER_ALREADY_IN_CLUSTER bit to recv_thread
could be wake up, and wake up recv_thread if there is
pending recv event.
Then ack_bast only wakes up recv_thread after IN_CLUSTER
bit is ready otherwise MD_CLUSTER_PENDING_RESYNC_EVENT is
set.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r-- | drivers/md/bitmap.c | 3 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 30 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 1 |
3 files changed, 28 insertions, 6 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ad5a85847004..d8129ec93ebd 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1848,6 +1848,9 @@ int bitmap_load(struct mddev *mddev) | |||
1848 | if (!bitmap) | 1848 | if (!bitmap) |
1849 | goto out; | 1849 | goto out; |
1850 | 1850 | ||
1851 | if (mddev_is_clustered(mddev)) | ||
1852 | md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); | ||
1853 | |||
1851 | /* Clear out old bitmap info first: Either there is none, or we | 1854 | /* Clear out old bitmap info first: Either there is none, or we |
1852 | * are resuming after someone else has possibly changed things, | 1855 | * are resuming after someone else has possibly changed things, |
1853 | * so we should forget old cached info. | 1856 | * so we should forget old cached info. |
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index a55b5f4d0dbe..bef6a47b443f 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -61,6 +61,10 @@ struct resync_info { | |||
61 | * the lock. | 61 | * the lock. |
62 | */ | 62 | */ |
63 | #define MD_CLUSTER_SEND_LOCKED_ALREADY 5 | 63 | #define MD_CLUSTER_SEND_LOCKED_ALREADY 5 |
64 | /* We should receive message after node joined cluster and | ||
65 | * set up all the related infos such as bitmap and personality */ | ||
66 | #define MD_CLUSTER_ALREADY_IN_CLUSTER 6 | ||
67 | #define MD_CLUSTER_PENDING_RECV_EVENT 7 | ||
64 | 68 | ||
65 | 69 | ||
66 | struct md_cluster_info { | 70 | struct md_cluster_info { |
@@ -376,8 +380,12 @@ static void ack_bast(void *arg, int mode) | |||
376 | struct dlm_lock_resource *res = arg; | 380 | struct dlm_lock_resource *res = arg; |
377 | struct md_cluster_info *cinfo = res->mddev->cluster_info; | 381 | struct md_cluster_info *cinfo = res->mddev->cluster_info; |
378 | 382 | ||
379 | if (mode == DLM_LOCK_EX) | 383 | if (mode == DLM_LOCK_EX) { |
380 | md_wakeup_thread(cinfo->recv_thread); | 384 | if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state)) |
385 | md_wakeup_thread(cinfo->recv_thread); | ||
386 | else | ||
387 | set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state); | ||
388 | } | ||
381 | } | 389 | } |
382 | 390 | ||
383 | static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot) | 391 | static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot) |
@@ -846,10 +854,6 @@ static int join(struct mddev *mddev, int nodes) | |||
846 | if (!cinfo->resync_lockres) | 854 | if (!cinfo->resync_lockres) |
847 | goto err; | 855 | goto err; |
848 | 856 | ||
849 | ret = gather_all_resync_info(mddev, nodes); | ||
850 | if (ret) | ||
851 | goto err; | ||
852 | |||
853 | return 0; | 857 | return 0; |
854 | err: | 858 | err: |
855 | md_unregister_thread(&cinfo->recovery_thread); | 859 | md_unregister_thread(&cinfo->recovery_thread); |
@@ -867,6 +871,19 @@ err: | |||
867 | return ret; | 871 | return ret; |
868 | } | 872 | } |
869 | 873 | ||
874 | static void load_bitmaps(struct mddev *mddev, int total_slots) | ||
875 | { | ||
876 | struct md_cluster_info *cinfo = mddev->cluster_info; | ||
877 | |||
878 | /* load all the node's bitmap info for resync */ | ||
879 | if (gather_all_resync_info(mddev, total_slots)) | ||
880 | pr_err("md-cluster: failed to gather all resyn infos\n"); | ||
881 | set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state); | ||
882 | /* wake up recv thread in case something need to be handled */ | ||
883 | if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state)) | ||
884 | md_wakeup_thread(cinfo->recv_thread); | ||
885 | } | ||
886 | |||
870 | static void resync_bitmap(struct mddev *mddev) | 887 | static void resync_bitmap(struct mddev *mddev) |
871 | { | 888 | { |
872 | struct md_cluster_info *cinfo = mddev->cluster_info; | 889 | struct md_cluster_info *cinfo = mddev->cluster_info; |
@@ -1208,6 +1225,7 @@ static struct md_cluster_operations cluster_ops = { | |||
1208 | .add_new_disk_cancel = add_new_disk_cancel, | 1225 | .add_new_disk_cancel = add_new_disk_cancel, |
1209 | .new_disk_ack = new_disk_ack, | 1226 | .new_disk_ack = new_disk_ack, |
1210 | .remove_disk = remove_disk, | 1227 | .remove_disk = remove_disk, |
1228 | .load_bitmaps = load_bitmaps, | ||
1211 | .gather_bitmaps = gather_bitmaps, | 1229 | .gather_bitmaps = gather_bitmaps, |
1212 | .lock_all_bitmaps = lock_all_bitmaps, | 1230 | .lock_all_bitmaps = lock_all_bitmaps, |
1213 | .unlock_all_bitmaps = unlock_all_bitmaps, | 1231 | .unlock_all_bitmaps = unlock_all_bitmaps, |
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 45ce6c97d8bd..e765499ba591 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h | |||
@@ -23,6 +23,7 @@ struct md_cluster_operations { | |||
23 | void (*add_new_disk_cancel)(struct mddev *mddev); | 23 | void (*add_new_disk_cancel)(struct mddev *mddev); |
24 | int (*new_disk_ack)(struct mddev *mddev, bool ack); | 24 | int (*new_disk_ack)(struct mddev *mddev, bool ack); |
25 | int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); | 25 | int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev); |
26 | void (*load_bitmaps)(struct mddev *mddev, int total_slots); | ||
26 | int (*gather_bitmaps)(struct md_rdev *rdev); | 27 | int (*gather_bitmaps)(struct md_rdev *rdev); |
27 | int (*lock_all_bitmaps)(struct mddev *mddev); | 28 | int (*lock_all_bitmaps)(struct mddev *mddev); |
28 | void (*unlock_all_bitmaps)(struct mddev *mddev); | 29 | void (*unlock_all_bitmaps)(struct mddev *mddev); |