diff options
author | Bob Peterson <rpeterso@redhat.com> | 2007-09-14 10:27:59 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:56:22 -0400 |
commit | 55c0c4ac0be144014651b19e77c9b77f367955de (patch) | |
tree | dbea7f24657f8e99f4ff94519cb4f24373930320 /fs | |
parent | d66f8277f53407754f50ae6bada68f1b68d04d48 (diff) |
[GFS2] GFS2: chmod hung - fix race in thread creation
The problem boiled down to a race between the gdlm_init_threads()
function initializing thread1 and its setting of blist = 1.
Essentially, "if (current == ls->thread1)" was checked by the thread
before the thread creator set ls->thread1.
Since thread1 is the only thread who is allowed to work on the
blocking queue, and since neither thread thought it was thread1, no one
was working on the queue. So everything just sat.
This patch reuses the ls->async_lock spin_lock to fix the race,
and it fixes the problem. I've done more than 2000 iterations of the
loop that was recreating the failure and it seems to work.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
--
Diffstat (limited to 'fs')
-rw-r--r-- | fs/gfs2/locking/dlm/thread.c | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 1aca51e45092..bd938f06481d 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls) | |||
268 | return 0; | 268 | return 0; |
269 | } | 269 | } |
270 | 270 | ||
271 | static int gdlm_thread(void *data) | 271 | static int gdlm_thread(void *data, int blist) |
272 | { | 272 | { |
273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; | 273 | struct gdlm_ls *ls = (struct gdlm_ls *) data; |
274 | struct gdlm_lock *lp = NULL; | 274 | struct gdlm_lock *lp = NULL; |
275 | int blist = 0; | ||
276 | uint8_t complete, blocking, submit, drop; | 275 | uint8_t complete, blocking, submit, drop; |
277 | DECLARE_WAITQUEUE(wait, current); | 276 | DECLARE_WAITQUEUE(wait, current); |
278 | 277 | ||
279 | /* Only thread1 is allowed to do blocking callbacks since gfs | 278 | /* Only thread1 is allowed to do blocking callbacks since gfs |
280 | may wait for a completion callback within a blocking cb. */ | 279 | may wait for a completion callback within a blocking cb. */ |
281 | 280 | ||
282 | if (current == ls->thread1) | ||
283 | blist = 1; | ||
284 | |||
285 | while (!kthread_should_stop()) { | 281 | while (!kthread_should_stop()) { |
286 | set_current_state(TASK_INTERRUPTIBLE); | 282 | set_current_state(TASK_INTERRUPTIBLE); |
287 | add_wait_queue(&ls->thread_wait, &wait); | 283 | add_wait_queue(&ls->thread_wait, &wait); |
@@ -333,12 +329,22 @@ static int gdlm_thread(void *data) | |||
333 | return 0; | 329 | return 0; |
334 | } | 330 | } |
335 | 331 | ||
332 | static int gdlm_thread1(void *data) | ||
333 | { | ||
334 | return gdlm_thread(data, 1); | ||
335 | } | ||
336 | |||
337 | static int gdlm_thread2(void *data) | ||
338 | { | ||
339 | return gdlm_thread(data, 0); | ||
340 | } | ||
341 | |||
336 | int gdlm_init_threads(struct gdlm_ls *ls) | 342 | int gdlm_init_threads(struct gdlm_ls *ls) |
337 | { | 343 | { |
338 | struct task_struct *p; | 344 | struct task_struct *p; |
339 | int error; | 345 | int error; |
340 | 346 | ||
341 | p = kthread_run(gdlm_thread, ls, "lock_dlm1"); | 347 | p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); |
342 | error = IS_ERR(p); | 348 | error = IS_ERR(p); |
343 | if (error) { | 349 | if (error) { |
344 | log_error("can't start lock_dlm1 thread %d", error); | 350 | log_error("can't start lock_dlm1 thread %d", error); |
@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls) | |||
346 | } | 352 | } |
347 | ls->thread1 = p; | 353 | ls->thread1 = p; |
348 | 354 | ||
349 | p = kthread_run(gdlm_thread, ls, "lock_dlm2"); | 355 | p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); |
350 | error = IS_ERR(p); | 356 | error = IS_ERR(p); |
351 | if (error) { | 357 | if (error) { |
352 | log_error("can't start lock_dlm2 thread %d", error); | 358 | log_error("can't start lock_dlm2 thread %d", error); |