aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md-cluster.c
diff options
context:
space:
mode:
authorGuoqing Jiang <gqjiang@suse.com>2016-05-02 11:33:12 -0400
committerShaohua Li <shli@fb.com>2016-05-04 15:39:35 -0400
commit1535212c542285e430d44a75bfc0a99df610f598 (patch)
tree5d387e9c80a353fc7fc28656ecc24243c0b51b1a /drivers/md/md-cluster.c
parent5b0fb33e8aa1c8a94f763d1025445146412ca766 (diff)
md-cluster: fix locking when node joins cluster during message broadcast
If a node joins the cluster while a message broadcast is under way, a lock issue could happen as follows. For a cluster which included two nodes, if node A is calling __sendmsg before up-convert CR to EX on ack, and node B released CR on ack. But if a new node C joins the cluster and it doesn't receive the message which A sent before, so it could hold CR on ack before A up-convert CR to EX on ack. So a node joining the cluster should get an EX lock on the "token" first to ensure no broadcast is ongoing, then release it after held CR on ack. Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/md-cluster.c')
-rw-r--r--drivers/md/md-cluster.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 76f88f731aa1..30f1160142c1 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -781,17 +781,24 @@ static int join(struct mddev *mddev, int nodes)
781 cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0); 781 cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
782 if (!cinfo->token_lockres) 782 if (!cinfo->token_lockres)
783 goto err; 783 goto err;
784 cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
785 if (!cinfo->ack_lockres)
786 goto err;
787 cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0); 784 cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
788 if (!cinfo->no_new_dev_lockres) 785 if (!cinfo->no_new_dev_lockres)
789 goto err; 786 goto err;
790 787
788 ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
789 if (ret) {
790 ret = -EAGAIN;
791 pr_err("md-cluster: can't join cluster to avoid lock issue\n");
792 goto err;
793 }
794 cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
795 if (!cinfo->ack_lockres)
796 goto err;
791 /* get sync CR lock on ACK. */ 797 /* get sync CR lock on ACK. */
792 if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) 798 if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
793 pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", 799 pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
794 ret); 800 ret);
801 dlm_unlock_sync(cinfo->token_lockres);
795 /* get sync CR lock on no-new-dev. */ 802 /* get sync CR lock on no-new-dev. */
796 if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR)) 803 if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
797 pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret); 804 pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);