aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBob Peterson <rpeterso@redhat.com>2007-09-14 10:27:59 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2007-10-10 03:56:22 -0400
commit55c0c4ac0be144014651b19e77c9b77f367955de (patch)
treedbea7f24657f8e99f4ff94519cb4f24373930320
parentd66f8277f53407754f50ae6bada68f1b68d04d48 (diff)
[GFS2] GFS2: chmod hung - fix race in thread creation
The problem boiled down to a race between the gdlm_init_threads() function initializing thread1 and its setting of blist = 1. Essentially, "if (current == ls->thread1)" was checked by the thread before the thread creator set ls->thread1. Since thread1 is the only thread who is allowed to work on the blocking queue, and since neither thread thought it was thread1, no one was working on the queue. So everything just sat. This patch reuses the ls->async_lock spin_lock to fix the race, and it fixes the problem. I've done more than 2000 iterations of the loop that was recreating the failure and it seems to work. Signed-off-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> --
-rw-r--r--fs/gfs2/locking/dlm/thread.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 1aca51e45092..bd938f06481d 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls)
268 return 0; 268 return 0;
269} 269}
270 270
271static int gdlm_thread(void *data) 271static int gdlm_thread(void *data, int blist)
272{ 272{
273 struct gdlm_ls *ls = (struct gdlm_ls *) data; 273 struct gdlm_ls *ls = (struct gdlm_ls *) data;
274 struct gdlm_lock *lp = NULL; 274 struct gdlm_lock *lp = NULL;
275 int blist = 0;
276 uint8_t complete, blocking, submit, drop; 275 uint8_t complete, blocking, submit, drop;
277 DECLARE_WAITQUEUE(wait, current); 276 DECLARE_WAITQUEUE(wait, current);
278 277
279 /* Only thread1 is allowed to do blocking callbacks since gfs 278 /* Only thread1 is allowed to do blocking callbacks since gfs
280 may wait for a completion callback within a blocking cb. */ 279 may wait for a completion callback within a blocking cb. */
281 280
282 if (current == ls->thread1)
283 blist = 1;
284
285 while (!kthread_should_stop()) { 281 while (!kthread_should_stop()) {
286 set_current_state(TASK_INTERRUPTIBLE); 282 set_current_state(TASK_INTERRUPTIBLE);
287 add_wait_queue(&ls->thread_wait, &wait); 283 add_wait_queue(&ls->thread_wait, &wait);
@@ -333,12 +329,22 @@ static int gdlm_thread(void *data)
333 return 0; 329 return 0;
334} 330}
335 331
332static int gdlm_thread1(void *data)
333{
334 return gdlm_thread(data, 1);
335}
336
337static int gdlm_thread2(void *data)
338{
339 return gdlm_thread(data, 0);
340}
341
336int gdlm_init_threads(struct gdlm_ls *ls) 342int gdlm_init_threads(struct gdlm_ls *ls)
337{ 343{
338 struct task_struct *p; 344 struct task_struct *p;
339 int error; 345 int error;
340 346
341 p = kthread_run(gdlm_thread, ls, "lock_dlm1"); 347 p = kthread_run(gdlm_thread1, ls, "lock_dlm1");
342 error = IS_ERR(p); 348 error = IS_ERR(p);
343 if (error) { 349 if (error) {
344 log_error("can't start lock_dlm1 thread %d", error); 350 log_error("can't start lock_dlm1 thread %d", error);
@@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls)
346 } 352 }
347 ls->thread1 = p; 353 ls->thread1 = p;
348 354
349 p = kthread_run(gdlm_thread, ls, "lock_dlm2"); 355 p = kthread_run(gdlm_thread2, ls, "lock_dlm2");
350 error = IS_ERR(p); 356 error = IS_ERR(p);
351 if (error) { 357 if (error) {
352 log_error("can't start lock_dlm2 thread %d", error); 358 log_error("can't start lock_dlm2 thread %d", error);