aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/requestqueue.c
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2012-04-26 16:54:29 -0400
committerDavid Teigland <teigland@redhat.com>2012-05-02 15:15:27 -0400
commit4875647a08e35f77274838d97ca8fa44158d50e2 (patch)
treebf8a39eaf3219af5d661ed3e347545306fd84bda /fs/dlm/requestqueue.c
parent6d40c4a708e0e996fd9c60d4093aebba5fe1f749 (diff)
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead of using the resource directory) has always been highly experimental, and never seriously used. This commit fixes a number of problems, making nodir much more usable. - Major change to recovery: recover all locks and restart all in-progress operations after recovery. In some cases it's not possible to know which in-progess locks to recover, so recover all. (Most require recovery in nodir mode anyway since rehashing changes most master nodes.) - Change the way nodir mode is enabled, from a command line mount arg passed through gfs2, into a sysfs file managed by dlm_controld, consistent with the other config settings. - Allow recovering MSTCPY locks on an rsb that has not yet been turned into a master copy. - Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages from a previous, aborted recovery cycle. Base this on the local recovery status not being in the state where any nodes should be sending LOCK messages for the current recovery cycle. - Hold rsb lock around dlm_purge_mstcpy_locks() because it may run concurrently with dlm_recover_master_copy(). - Maintain highbast on process-copy lkb's (in addition to the master as is usual), because the lkb can switch back and forth between being a master and being a process copy as the master node changes in recovery. - When recovering MSTCPY locks, flag rsb's that have non-empty convert or waiting queues for granting at the end of recovery. (Rename flag from LOCKS_PURGED to RECOVER_GRANT and similar for the recovery function, because it's not only resources with purged locks that need grant a grant attempt.) - Replace a couple of unnecessary assertion panics with error messages. Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/requestqueue.c')
-rw-r--r--fs/dlm/requestqueue.c39
1 files changed, 10 insertions, 29 deletions
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index d3191bf03a68..1695f1b0dd45 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -65,6 +65,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
65int dlm_process_requestqueue(struct dlm_ls *ls) 65int dlm_process_requestqueue(struct dlm_ls *ls)
66{ 66{
67 struct rq_entry *e; 67 struct rq_entry *e;
68 struct dlm_message *ms;
68 int error = 0; 69 int error = 0;
69 70
70 mutex_lock(&ls->ls_requestqueue_mutex); 71 mutex_lock(&ls->ls_requestqueue_mutex);
@@ -78,6 +79,14 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
78 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); 79 e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
79 mutex_unlock(&ls->ls_requestqueue_mutex); 80 mutex_unlock(&ls->ls_requestqueue_mutex);
80 81
82 ms = &e->request;
83
84 log_limit(ls, "dlm_process_requestqueue msg %d from %d "
85 "lkid %x remid %x result %d seq %u",
86 ms->m_type, ms->m_header.h_nodeid,
87 ms->m_lkid, ms->m_remid, ms->m_result,
88 e->recover_seq);
89
81 dlm_receive_message_saved(ls, &e->request, e->recover_seq); 90 dlm_receive_message_saved(ls, &e->request, e->recover_seq);
82 91
83 mutex_lock(&ls->ls_requestqueue_mutex); 92 mutex_lock(&ls->ls_requestqueue_mutex);
@@ -140,35 +149,7 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
140 if (!dlm_no_directory(ls)) 149 if (!dlm_no_directory(ls))
141 return 0; 150 return 0;
142 151
143 /* with no directory, the master is likely to change as a part of 152 return 1;
144 recovery; requests to/from the defunct master need to be purged */
145
146 switch (type) {
147 case DLM_MSG_REQUEST:
148 case DLM_MSG_CONVERT:
149 case DLM_MSG_UNLOCK:
150 case DLM_MSG_CANCEL:
151 /* we're no longer the master of this resource, the sender
152 will resend to the new master (see waiter_needs_recovery) */
153
154 if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid())
155 return 1;
156 break;
157
158 case DLM_MSG_REQUEST_REPLY:
159 case DLM_MSG_CONVERT_REPLY:
160 case DLM_MSG_UNLOCK_REPLY:
161 case DLM_MSG_CANCEL_REPLY:
162 case DLM_MSG_GRANT:
163 /* this reply is from the former master of the resource,
164 we'll resend to the new master if needed */
165
166 if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid)
167 return 1;
168 break;
169 }
170
171 return 0;
172} 153}
173 154
174void dlm_purge_requestqueue(struct dlm_ls *ls) 155void dlm_purge_requestqueue(struct dlm_ls *ls)