diff options
author | David Teigland <teigland@redhat.com> | 2012-04-26 16:54:29 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2012-05-02 15:15:27 -0400 |
commit | 4875647a08e35f77274838d97ca8fa44158d50e2 (patch) | |
tree | bf8a39eaf3219af5d661ed3e347545306fd84bda /fs/dlm/requestqueue.c | |
parent | 6d40c4a708e0e996fd9c60d4093aebba5fe1f749 (diff) |
dlm: fixes for nodir mode
The "nodir" mode (statically assign master nodes instead
of using the resource directory) has always been highly
experimental, and never seriously used. This commit
fixes a number of problems, making nodir much more usable.
- Major change to recovery: recover all locks and restart
all in-progress operations after recovery. In some
cases it's not possible to know which in-progess locks
to recover, so recover all. (Most require recovery
in nodir mode anyway since rehashing changes most
master nodes.)
- Change the way nodir mode is enabled, from a command
line mount arg passed through gfs2, into a sysfs
file managed by dlm_controld, consistent with the
other config settings.
- Allow recovering MSTCPY locks on an rsb that has not
yet been turned into a master copy.
- Ignore RCOM_LOCK and RCOM_LOCK_REPLY recovery messages
from a previous, aborted recovery cycle. Base this
on the local recovery status not being in the state
where any nodes should be sending LOCK messages for the
current recovery cycle.
- Hold rsb lock around dlm_purge_mstcpy_locks() because it
may run concurrently with dlm_recover_master_copy().
- Maintain highbast on process-copy lkb's (in addition to
the master as is usual), because the lkb can switch
back and forth between being a master and being a
process copy as the master node changes in recovery.
- When recovering MSTCPY locks, flag rsb's that have
non-empty convert or waiting queues for granting
at the end of recovery. (Rename flag from LOCKS_PURGED
to RECOVER_GRANT and similar for the recovery function,
because it's not only resources with purged locks
that need grant a grant attempt.)
- Replace a couple of unnecessary assertion panics with
error messages.
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/requestqueue.c')
-rw-r--r-- | fs/dlm/requestqueue.c | 39 |
1 files changed, 10 insertions, 29 deletions
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index d3191bf03a68..1695f1b0dd45 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c | |||
@@ -65,6 +65,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms) | |||
65 | int dlm_process_requestqueue(struct dlm_ls *ls) | 65 | int dlm_process_requestqueue(struct dlm_ls *ls) |
66 | { | 66 | { |
67 | struct rq_entry *e; | 67 | struct rq_entry *e; |
68 | struct dlm_message *ms; | ||
68 | int error = 0; | 69 | int error = 0; |
69 | 70 | ||
70 | mutex_lock(&ls->ls_requestqueue_mutex); | 71 | mutex_lock(&ls->ls_requestqueue_mutex); |
@@ -78,6 +79,14 @@ int dlm_process_requestqueue(struct dlm_ls *ls) | |||
78 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); | 79 | e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); |
79 | mutex_unlock(&ls->ls_requestqueue_mutex); | 80 | mutex_unlock(&ls->ls_requestqueue_mutex); |
80 | 81 | ||
82 | ms = &e->request; | ||
83 | |||
84 | log_limit(ls, "dlm_process_requestqueue msg %d from %d " | ||
85 | "lkid %x remid %x result %d seq %u", | ||
86 | ms->m_type, ms->m_header.h_nodeid, | ||
87 | ms->m_lkid, ms->m_remid, ms->m_result, | ||
88 | e->recover_seq); | ||
89 | |||
81 | dlm_receive_message_saved(ls, &e->request, e->recover_seq); | 90 | dlm_receive_message_saved(ls, &e->request, e->recover_seq); |
82 | 91 | ||
83 | mutex_lock(&ls->ls_requestqueue_mutex); | 92 | mutex_lock(&ls->ls_requestqueue_mutex); |
@@ -140,35 +149,7 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) | |||
140 | if (!dlm_no_directory(ls)) | 149 | if (!dlm_no_directory(ls)) |
141 | return 0; | 150 | return 0; |
142 | 151 | ||
143 | /* with no directory, the master is likely to change as a part of | 152 | return 1; |
144 | recovery; requests to/from the defunct master need to be purged */ | ||
145 | |||
146 | switch (type) { | ||
147 | case DLM_MSG_REQUEST: | ||
148 | case DLM_MSG_CONVERT: | ||
149 | case DLM_MSG_UNLOCK: | ||
150 | case DLM_MSG_CANCEL: | ||
151 | /* we're no longer the master of this resource, the sender | ||
152 | will resend to the new master (see waiter_needs_recovery) */ | ||
153 | |||
154 | if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid()) | ||
155 | return 1; | ||
156 | break; | ||
157 | |||
158 | case DLM_MSG_REQUEST_REPLY: | ||
159 | case DLM_MSG_CONVERT_REPLY: | ||
160 | case DLM_MSG_UNLOCK_REPLY: | ||
161 | case DLM_MSG_CANCEL_REPLY: | ||
162 | case DLM_MSG_GRANT: | ||
163 | /* this reply is from the former master of the resource, | ||
164 | we'll resend to the new master if needed */ | ||
165 | |||
166 | if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid) | ||
167 | return 1; | ||
168 | break; | ||
169 | } | ||
170 | |||
171 | return 0; | ||
172 | } | 153 | } |
173 | 154 | ||
174 | void dlm_purge_requestqueue(struct dlm_ls *ls) | 155 | void dlm_purge_requestqueue(struct dlm_ls *ls) |