[DLM] fix requestqueue race

Red Hat BZ 211914 There's a race between dlm_recoverd (1) enabling locking and (2) clearing out the requestqueue, and dlm_recvd (1) checking if locking is enabled and (2) adding a message to the requestqueue. An order of recoverd(1), recvd(1), recvd(2), recoverd(2) will result in a message being left on the requestqueue. The fix is to have dlm_recvd check if dlm_recoverd has enabled locking after taking the mutex for the requestqueue and if it has processing the message instead of queueing it. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
author: David Teigland <teigland@redhat.com> 2006-10-31 12:55:56 -0500
committer: Steven Whitehouse <swhiteho@redhat.com> 2006-11-30 10:35:10 -0500
commit: d4400156d415540086c34a06e5d233122d6bf56a (patch)
tree: 747e4d270fb453d57926d6b6cab564664d9d2c0f /fs
parent: 435618b75b82b5ee511cc01fcdda9c44adb2f4bd (diff)
3 files changed, 29 insertions, 9 deletions
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 3f2befa4797b..6088a16926bf 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3028,10 +3028,17 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
        while (1) {
                if (dlm_locking_stopped(ls)) {
-                        if (!recovery)
+                        if (recovery) {
-                                dlm_add_requestqueue(ls, nodeid, hd);
+                                error = -EINTR;
-                        error = -EINTR;
+                                goto out;
-                        goto out;
+                        }
+                        error = dlm_add_requestqueue(ls, nodeid, hd);
+                        if (error == -EAGAIN)
+                                continue;
+                        else {
+                                error = -EINTR;
+                                goto out;
+                        }
                }
                if (lock_recovery_try(ls))
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089634a2..0226d2a0a0fa 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,26 +30,39 @@ struct rq_entry {
 * lockspace is enabled on some while still suspended on others.
 */
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 {
        struct rq_entry *e;
        int length = hd->h_length;
+        int rv = 0;
        if (dlm_is_removed(ls, nodeid))
-                return;
+                return 0;
        e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
        if (!e) {
                log_print("dlm_add_requestqueue: out of memory\n");
-                return;
+                return 0;
        }
        e->nodeid = nodeid;
        memcpy(e->request, hd, length);
+        /* We need to check dlm_locking_stopped() after taking the mutex to
+           avoid a race where dlm_recoverd enables locking and runs
+           process_requestqueue between our earlier dlm_locking_stopped check
+           and this addition to the requestqueue. */
        mutex_lock(&ls->ls_requestqueue_mutex);
-        list_add_tail(&e->list, &ls->ls_requestqueue);
+        if (dlm_locking_stopped(ls))
+                list_add_tail(&e->list, &ls->ls_requestqueue);
+        else {
+                log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+                kfree(e);
+                rv = -EAGAIN;
+        }
        mutex_unlock(&ls->ls_requestqueue_mutex);
+        return rv;
 }
 int dlm_process_requestqueue(struct dlm_ls *ls)
diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h
index 349f0d292d95..6a53ea03335d 100644
--- a/fs/dlm/requestqueue.h
+++ b/fs/dlm/requestqueue.h
@@ -13,7 +13,7 @@
 #ifndef __REQUESTQUEUE_DOT_H__
 #define __REQUESTQUEUE_DOT_H__
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
 int dlm_process_requestqueue(struct dlm_ls *ls);
 void dlm_wait_requestqueue(struct dlm_ls *ls);
 void dlm_purge_requestqueue(struct dlm_ls *ls);
author	David Teigland <teigland@redhat.com>	2006-10-31 12:55:56 -0500
committer	Steven Whitehouse <swhiteho@redhat.com>	2006-11-30 10:35:10 -0500
commit	d4400156d415540086c34a06e5d233122d6bf56a (patch)
tree	747e4d270fb453d57926d6b6cab564664d9d2c0f /fs
parent	435618b75b82b5ee511cc01fcdda9c44adb2f4bd (diff)