aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm/member.c
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2007-09-27 16:53:38 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2007-10-10 03:56:38 -0400
commitc36258b5925e6cf6bf72904635100593573bfcff (patch)
tree565f1ce29a7f8a2cd1c25f2d36c932727adbdbc2 /fs/dlm/member.c
parentb434eda6fda5bcdcc2dd918e5ffbf7184f2d4e17 (diff)
[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv threads while working in the dlm. This allows dlm_recv activity to be suspended when the lockspace transitions to, from and between recovery cycles. The specific bug prompting this change is one where an in-progress recovery cycle is aborted by a new recovery cycle. While dlm_recv was processing a recovery message, the recovery cycle was aborted and dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count on an rsb after dlm_recoverd had reset it to zero. This is fixed by suspending dlm_recv (taking write lock on the rwsem) before aborting the current recovery. The transitions to/from normal and recovery modes are simplified by using this new ability to block dlm_recv. The switch from normal to recovery mode means dlm_recv goes from processing locking messages, to saving them for later, and vice versa. Races are avoided by blocking dlm_recv when setting the flag that switches between modes. Signed-off-by: David Teigland <teigland@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/member.c')
-rw-r--r--fs/dlm/member.c41
1 files changed, 27 insertions, 14 deletions
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index d09977528f69..e9cdcab306e2 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -18,10 +18,6 @@
18#include "rcom.h" 18#include "rcom.h"
19#include "config.h" 19#include "config.h"
20 20
21/*
22 * Following called by dlm_recoverd thread
23 */
24
25static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) 21static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
26{ 22{
27 struct dlm_member *memb = NULL; 23 struct dlm_member *memb = NULL;
@@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
250 return error; 246 return error;
251} 247}
252 248
253/* 249/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
254 * Following called from lockspace.c 250 dlm_ls_start() is called on any of them to start the new recovery. */
255 */
256 251
257int dlm_ls_stop(struct dlm_ls *ls) 252int dlm_ls_stop(struct dlm_ls *ls)
258{ 253{
259 int new; 254 int new;
260 255
261 /* 256 /*
262 * A stop cancels any recovery that's in progress (see RECOVERY_STOP, 257 * Prevent dlm_recv from being in the middle of something when we do
263 * dlm_recovery_stopped()) and prevents any new locks from being 258 * the stop. This includes ensuring dlm_recv isn't processing a
264 * processed (see RUNNING, dlm_locking_stopped()). 259 * recovery message (rcom), while dlm_recoverd is aborting and
260 * resetting things from an in-progress recovery. i.e. we want
261 * dlm_recoverd to abort its recovery without worrying about dlm_recv
262 * processing an rcom at the same time. Stopping dlm_recv also makes
263 * it easy for dlm_receive_message() to check locking stopped and add a
264 * message to the requestqueue without races.
265 */
266
267 down_write(&ls->ls_recv_active);
268
269 /*
270 * Abort any recovery that's in progress (see RECOVERY_STOP,
271 * dlm_recovery_stopped()) and tell any other threads running in the
272 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
265 */ 273 */
266 274
267 spin_lock(&ls->ls_recover_lock); 275 spin_lock(&ls->ls_recover_lock);
@@ -271,8 +279,14 @@ int dlm_ls_stop(struct dlm_ls *ls)
271 spin_unlock(&ls->ls_recover_lock); 279 spin_unlock(&ls->ls_recover_lock);
272 280
273 /* 281 /*
282 * Let dlm_recv run again, now any normal messages will be saved on the
283 * requestqueue for later.
284 */
285
286 up_write(&ls->ls_recv_active);
287
288 /*
274 * This in_recovery lock does two things: 289 * This in_recovery lock does two things:
275 *
276 * 1) Keeps this function from returning until all threads are out 290 * 1) Keeps this function from returning until all threads are out
277 * of locking routines and locking is truely stopped. 291 * of locking routines and locking is truely stopped.
278 * 2) Keeps any new requests from being processed until it's unlocked 292 * 2) Keeps any new requests from being processed until it's unlocked
@@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls)
284 298
285 /* 299 /*
286 * The recoverd suspend/resume makes sure that dlm_recoverd (if 300 * The recoverd suspend/resume makes sure that dlm_recoverd (if
287 * running) has noticed the clearing of RUNNING above and quit 301 * running) has noticed RECOVERY_STOP above and quit processing the
288 * processing the previous recovery. This will be true for all nodes 302 * previous recovery.
289 * before any nodes start the new recovery.
290 */ 303 */
291 304
292 dlm_recoverd_suspend(ls); 305 dlm_recoverd_suspend(ls);