diff options
author | David Teigland <teigland@redhat.com> | 2007-09-27 16:53:38 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:56:38 -0400 |
commit | c36258b5925e6cf6bf72904635100593573bfcff (patch) | |
tree | 565f1ce29a7f8a2cd1c25f2d36c932727adbdbc2 /fs/dlm/recoverd.c | |
parent | b434eda6fda5bcdcc2dd918e5ffbf7184f2d4e17 (diff) |
[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv
threads while working in the dlm. This allows dlm_recv activity to be
suspended when the lockspace transitions to, from and between recovery
cycles.
The specific bug prompting this change is one where an in-progress
recovery cycle is aborted by a new recovery cycle. While dlm_recv was
processing a recovery message, the recovery cycle was aborted and
dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count
on an rsb after dlm_recoverd had reset it to zero. This is fixed by
suspending dlm_recv (taking write lock on the rwsem) before aborting the
current recovery.
The transitions to/from normal and recovery modes are simplified by using
this new ability to block dlm_recv. The switch from normal to recovery
mode means dlm_recv goes from processing locking messages, to saving them
for later, and vice versa. Races are avoided by blocking dlm_recv when
setting the flag that switches between modes.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/recoverd.c')
-rw-r--r-- | fs/dlm/recoverd.c | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 66575997861c..4b89e20eebe7 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -24,19 +24,28 @@ | |||
24 | 24 | ||
25 | 25 | ||
26 | /* If the start for which we're re-enabling locking (seq) has been superseded | 26 | /* If the start for which we're re-enabling locking (seq) has been superseded |
27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. */ | 27 | by a newer stop (ls_recover_seq), we need to leave locking disabled. |
28 | |||
29 | We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees | ||
30 | locking stopped and b) adds a message to the requestqueue, but dlm_recoverd | ||
31 | enables locking and clears the requestqueue between a and b. */ | ||
28 | 32 | ||
29 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) | 33 | static int enable_locking(struct dlm_ls *ls, uint64_t seq) |
30 | { | 34 | { |
31 | int error = -EINTR; | 35 | int error = -EINTR; |
32 | 36 | ||
37 | down_write(&ls->ls_recv_active); | ||
38 | |||
33 | spin_lock(&ls->ls_recover_lock); | 39 | spin_lock(&ls->ls_recover_lock); |
34 | if (ls->ls_recover_seq == seq) { | 40 | if (ls->ls_recover_seq == seq) { |
35 | set_bit(LSFL_RUNNING, &ls->ls_flags); | 41 | set_bit(LSFL_RUNNING, &ls->ls_flags); |
42 | /* unblocks processes waiting to enter the dlm */ | ||
36 | up_write(&ls->ls_in_recovery); | 43 | up_write(&ls->ls_in_recovery); |
37 | error = 0; | 44 | error = 0; |
38 | } | 45 | } |
39 | spin_unlock(&ls->ls_recover_lock); | 46 | spin_unlock(&ls->ls_recover_lock); |
47 | |||
48 | up_write(&ls->ls_recv_active); | ||
40 | return error; | 49 | return error; |
41 | } | 50 | } |
42 | 51 | ||