diff options
author | Guoqing Jiang <gqjiang@suse.com> | 2016-05-02 11:33:12 -0400 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-05-04 15:39:35 -0400 |
commit | 1535212c542285e430d44a75bfc0a99df610f598 (patch) | |
tree | 5d387e9c80a353fc7fc28656ecc24243c0b51b1a /drivers/md/md-cluster.c | |
parent | 5b0fb33e8aa1c8a94f763d1025445146412ca766 (diff) |
md-cluster: fix locking when node joins cluster during message broadcast
If a node joins the cluster while a message broadcast
is under way, a lock issue could happen as follows.
For a cluster which included two nodes, if node A is
calling __sendmsg before up-convert CR to EX on ack,
and node B released CR on ack. But if a new node C
joins the cluster and it doesn't receive the message
which A sent before, so it could hold CR on ack before
A up-convert CR to EX on ack.
So a node joining the cluster should get an EX lock on
the "token" first to ensure no broadcast is ongoing,
then release it after held CR on ack.
Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r-- | drivers/md/md-cluster.c | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 76f88f731aa1..30f1160142c1 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c | |||
@@ -781,17 +781,24 @@ static int join(struct mddev *mddev, int nodes) | |||
781 | cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0); | 781 | cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0); |
782 | if (!cinfo->token_lockres) | 782 | if (!cinfo->token_lockres) |
783 | goto err; | 783 | goto err; |
784 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); | ||
785 | if (!cinfo->ack_lockres) | ||
786 | goto err; | ||
787 | cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); | 784 | cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); |
788 | if (!cinfo->no_new_dev_lockres) | 785 | if (!cinfo->no_new_dev_lockres) |
789 | goto err; | 786 | goto err; |
790 | 787 | ||
788 | ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); | ||
789 | if (ret) { | ||
790 | ret = -EAGAIN; | ||
791 | pr_err("md-cluster: can't join cluster to avoid lock issue\n"); | ||
792 | goto err; | ||
793 | } | ||
794 | cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); | ||
795 | if (!cinfo->ack_lockres) | ||
796 | goto err; | ||
791 | /* get sync CR lock on ACK. */ | 797 | /* get sync CR lock on ACK. */ |
792 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) | 798 | if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) |
793 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", | 799 | pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", |
794 | ret); | 800 | ret); |
801 | dlm_unlock_sync(cinfo->token_lockres); | ||
795 | /* get sync CR lock on no-new-dev. */ | 802 | /* get sync CR lock on no-new-dev. */ |
796 | if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) | 803 | if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) |
797 | pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); | 804 | pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); |