diff options
author | David Teigland <teigland@redhat.com> | 2007-05-29 09:44:23 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-07-09 03:22:52 -0400 |
commit | 84d8cd69a8e7f1c9962f46bc79850c9f1f663806 (patch) | |
tree | 4c0cedc62e7b94ba2aaa1ea9faeb7de074e08e08 /fs | |
parent | b3cab7b9a34a6e65c1ca8f80fb57b256d57e8555 (diff) |
[DLM] timeout fixes
Various fixes related to the new timeout feature:
- add_timeout() missed setting TIMEWARN flag on lkb's when the
TIMEOUT flag was already set
- clear_proc_locks should remove a dead process's locks from the
timeout list
- the end-of-life calculation for user locks needs to consider that
ETIMEDOUT is equivalent to -DLM_ECANCEL
- make initial default timewarn_cs config value visible in configfs
- change bit position of TIMEOUT_CANCEL flag so it's not copied to
a remote master node
- set timestamp on remote lkb's so a lock dump will display the time
they've been waiting
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dlm/config.c | 1 | ||||
-rw-r--r-- | fs/dlm/dlm_internal.h | 2 | ||||
-rw-r--r-- | fs/dlm/lock.c | 13 | ||||
-rw-r--r-- | fs/dlm/netlink.c | 2 | ||||
-rw-r--r-- | fs/dlm/user.c | 49 |
5 files changed, 39 insertions, 28 deletions
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 2909abf1bbc3..1b59fa56a599 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
@@ -433,6 +433,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
433 | cl->cl_toss_secs = dlm_config.ci_toss_secs; | 433 | cl->cl_toss_secs = dlm_config.ci_toss_secs; |
434 | cl->cl_scan_secs = dlm_config.ci_scan_secs; | 434 | cl->cl_scan_secs = dlm_config.ci_scan_secs; |
435 | cl->cl_log_debug = dlm_config.ci_log_debug; | 435 | cl->cl_log_debug = dlm_config.ci_log_debug; |
436 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; | ||
436 | 437 | ||
437 | space_list = &sps->ss_group; | 438 | space_list = &sps->ss_group; |
438 | comm_list = &cms->cs_group; | 439 | comm_list = &cms->cs_group; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 03ba6c4fd5c2..a7435a8df35e 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -215,9 +215,9 @@ struct dlm_args { | |||
215 | #define DLM_IFL_OVERLAP_CANCEL 0x00100000 | 215 | #define DLM_IFL_OVERLAP_CANCEL 0x00100000 |
216 | #define DLM_IFL_ENDOFLIFE 0x00200000 | 216 | #define DLM_IFL_ENDOFLIFE 0x00200000 |
217 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 | 217 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 |
218 | #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 | ||
218 | #define DLM_IFL_USER 0x00000001 | 219 | #define DLM_IFL_USER 0x00000001 |
219 | #define DLM_IFL_ORPHAN 0x00000002 | 220 | #define DLM_IFL_ORPHAN 0x00000002 |
220 | #define DLM_IFL_TIMEOUT_CANCEL 0x00000004 | ||
221 | 221 | ||
222 | struct dlm_lkb { | 222 | struct dlm_lkb { |
223 | struct dlm_rsb *lkb_resource; /* the rsb */ | 223 | struct dlm_rsb *lkb_resource; /* the rsb */ |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 2f8a5a700cc0..df91578145d1 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -1010,17 +1010,18 @@ static void add_timeout(struct dlm_lkb *lkb) | |||
1010 | { | 1010 | { |
1011 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 1011 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
1012 | 1012 | ||
1013 | if (is_master_copy(lkb)) | 1013 | if (is_master_copy(lkb)) { |
1014 | lkb->lkb_timestamp = jiffies; | ||
1014 | return; | 1015 | return; |
1015 | 1016 | } | |
1016 | if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) | ||
1017 | goto add_it; | ||
1018 | 1017 | ||
1019 | if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && | 1018 | if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) && |
1020 | !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { | 1019 | !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) { |
1021 | lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; | 1020 | lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN; |
1022 | goto add_it; | 1021 | goto add_it; |
1023 | } | 1022 | } |
1023 | if (lkb->lkb_exflags & DLM_LKF_TIMEOUT) | ||
1024 | goto add_it; | ||
1024 | return; | 1025 | return; |
1025 | 1026 | ||
1026 | add_it: | 1027 | add_it: |
@@ -3510,8 +3511,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
3510 | case -DLM_ECANCEL: | 3511 | case -DLM_ECANCEL: |
3511 | receive_flags_reply(lkb, ms); | 3512 | receive_flags_reply(lkb, ms); |
3512 | revert_lock_pc(r, lkb); | 3513 | revert_lock_pc(r, lkb); |
3513 | if (ms->m_result) | 3514 | queue_cast(r, lkb, -DLM_ECANCEL); |
3514 | queue_cast(r, lkb, -DLM_ECANCEL); | ||
3515 | break; | 3515 | break; |
3516 | case 0: | 3516 | case 0: |
3517 | break; | 3517 | break; |
@@ -4534,6 +4534,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc) | |||
4534 | lkb = del_proc_lock(ls, proc); | 4534 | lkb = del_proc_lock(ls, proc); |
4535 | if (!lkb) | 4535 | if (!lkb) |
4536 | break; | 4536 | break; |
4537 | del_timeout(lkb); | ||
4537 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) | 4538 | if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) |
4538 | orphan_proc_lock(ls, lkb); | 4539 | orphan_proc_lock(ls, lkb); |
4539 | else | 4540 | else |
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 804b32cd22c1..863b87d0dc71 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -133,8 +133,6 @@ void dlm_timeout_warn(struct dlm_lkb *lkb) | |||
133 | size_t size; | 133 | size_t size; |
134 | int rv; | 134 | int rv; |
135 | 135 | ||
136 | log_debug(lkb->lkb_resource->res_ls, "timeout_warn %x", lkb->lkb_id); | ||
137 | |||
138 | size = nla_total_size(sizeof(struct dlm_lock_data)) + | 136 | size = nla_total_size(sizeof(struct dlm_lock_data)) + |
139 | nla_total_size(0); /* why this? */ | 137 | nla_total_size(0); /* why this? */ |
140 | 138 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 37aad3fe8949..329da1b5285f 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -138,6 +138,35 @@ static void compat_output(struct dlm_lock_result *res, | |||
138 | } | 138 | } |
139 | #endif | 139 | #endif |
140 | 140 | ||
141 | /* Figure out if this lock is at the end of its life and no longer | ||
142 | available for the application to use. The lkb still exists until | ||
143 | the final ast is read. A lock becomes EOL in three situations: | ||
144 | 1. a noqueue request fails with EAGAIN | ||
145 | 2. an unlock completes with EUNLOCK | ||
146 | 3. a cancel of a waiting request completes with ECANCEL/EDEADLK | ||
147 | An EOL lock needs to be removed from the process's list of locks. | ||
148 | And we can't allow any new operation on an EOL lock. This is | ||
149 | not related to the lifetime of the lkb struct which is managed | ||
150 | entirely by refcount. */ | ||
151 | |||
152 | static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type) | ||
153 | { | ||
154 | switch (sb_status) { | ||
155 | case -DLM_EUNLOCK: | ||
156 | return 1; | ||
157 | case -DLM_ECANCEL: | ||
158 | case -ETIMEDOUT: | ||
159 | if (lkb->lkb_grmode == DLM_LOCK_IV) | ||
160 | return 1; | ||
161 | break; | ||
162 | case -EAGAIN: | ||
163 | if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV) | ||
164 | return 1; | ||
165 | break; | ||
166 | } | ||
167 | return 0; | ||
168 | } | ||
169 | |||
141 | /* we could possibly check if the cancel of an orphan has resulted in the lkb | 170 | /* we could possibly check if the cancel of an orphan has resulted in the lkb |
142 | being removed and then remove that lkb from the orphans list and free it */ | 171 | being removed and then remove that lkb from the orphans list and free it */ |
143 | 172 | ||
@@ -184,25 +213,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type) | |||
184 | log_debug(ls, "ast overlap %x status %x %x", | 213 | log_debug(ls, "ast overlap %x status %x %x", |
185 | lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); | 214 | lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags); |
186 | 215 | ||
187 | /* Figure out if this lock is at the end of its life and no longer | 216 | eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type); |
188 | available for the application to use. The lkb still exists until | ||
189 | the final ast is read. A lock becomes EOL in three situations: | ||
190 | 1. a noqueue request fails with EAGAIN | ||
191 | 2. an unlock completes with EUNLOCK | ||
192 | 3. a cancel of a waiting request completes with ECANCEL | ||
193 | An EOL lock needs to be removed from the process's list of locks. | ||
194 | And we can't allow any new operation on an EOL lock. This is | ||
195 | not related to the lifetime of the lkb struct which is managed | ||
196 | entirely by refcount. */ | ||
197 | |||
198 | if (type == AST_COMP && | ||
199 | lkb->lkb_grmode == DLM_LOCK_IV && | ||
200 | ua->lksb.sb_status == -EAGAIN) | ||
201 | eol = 1; | ||
202 | else if (ua->lksb.sb_status == -DLM_EUNLOCK || | ||
203 | (ua->lksb.sb_status == -DLM_ECANCEL && | ||
204 | lkb->lkb_grmode == DLM_LOCK_IV)) | ||
205 | eol = 1; | ||
206 | if (eol) { | 217 | if (eol) { |
207 | lkb->lkb_ast_type &= ~AST_BAST; | 218 | lkb->lkb_ast_type &= ~AST_BAST; |
208 | lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; | 219 | lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; |