aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Teigland <teigland@redhat.com>2012-08-02 12:08:21 -0400
committerDavid Teigland <teigland@redhat.com>2012-08-08 12:33:49 -0400
commit475f230c6072fb2186f48b23943afcd0ee3a8343 (patch)
tree42c7979e644138ed93f30f2cd8cf2c33bb849078
parent6ad2291624824c1de19dbbbbb6d4f9f601b60781 (diff)
dlm: fix unlock balance warnings
The in_recovery rw_semaphore has always been acquired and released by different threads by design. To work around the "BUG: bad unlock balance detected!" messages, adjust things so the dlm_recoverd thread always does both down_write and up_write. Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r--fs/dlm/dlm_internal.h46
-rw-r--r--fs/dlm/lockspace.c15
-rw-r--r--fs/dlm/member.c17
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/dlm/recoverd.c27
-rw-r--r--fs/dlm/recoverd.h1
6 files changed, 78 insertions, 30 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 9d3e485f88c8..871c1abf6029 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -604,6 +604,7 @@ struct dlm_ls {
604 struct idr ls_recover_idr; 604 struct idr ls_recover_idr;
605 spinlock_t ls_recover_idr_lock; 605 spinlock_t ls_recover_idr_lock;
606 wait_queue_head_t ls_wait_general; 606 wait_queue_head_t ls_wait_general;
607 wait_queue_head_t ls_recover_lock_wait;
607 struct mutex ls_clear_proc_locks; 608 struct mutex ls_clear_proc_locks;
608 609
609 struct list_head ls_root_list; /* root resources */ 610 struct list_head ls_root_list; /* root resources */
@@ -616,15 +617,40 @@ struct dlm_ls {
616 char ls_name[1]; 617 char ls_name[1];
617}; 618};
618 619
619#define LSFL_WORK 0 620/*
620#define LSFL_RUNNING 1 621 * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines
621#define LSFL_RECOVERY_STOP 2 622 * that they should abort what they're doing so new recovery can be started.
622#define LSFL_RCOM_READY 3 623 *
623#define LSFL_RCOM_WAIT 4 624 * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it
624#define LSFL_UEVENT_WAIT 5 625 * should do down_write() on the in_recovery rw_semaphore. (doing down_write
625#define LSFL_TIMEWARN 6 626 * within dlm_ls_stop causes complaints about the lock acquired/released
626#define LSFL_CB_DELAY 7 627 * in different contexts.)
627#define LSFL_NODIR 8 628 *
629 * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore.
630 * It sets this after it is done with down_write() on the in_recovery
631 * rw_semaphore and clears it after it has released the rw_semaphore.
632 *
633 * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it
634 * should begin recovery of the lockspace.
635 *
636 * LSFL_RUNNING - set when normal locking activity is enabled.
637 * dlm_ls_stop() clears this to tell dlm locking routines that they should
638 * quit what they are doing so recovery can run. dlm_recoverd sets
639 * this after recovery is finished.
640 */
641
642#define LSFL_RECOVER_STOP 0
643#define LSFL_RECOVER_DOWN 1
644#define LSFL_RECOVER_LOCK 2
645#define LSFL_RECOVER_WORK 3
646#define LSFL_RUNNING 4
647
648#define LSFL_RCOM_READY 5
649#define LSFL_RCOM_WAIT 6
650#define LSFL_UEVENT_WAIT 7
651#define LSFL_TIMEWARN 8
652#define LSFL_CB_DELAY 9
653#define LSFL_NODIR 10
628 654
629/* much of this is just saving user space pointers associated with the 655/* much of this is just saving user space pointers associated with the
630 lock that we pass back to the user lib with an ast */ 656 lock that we pass back to the user lib with an ast */
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls)
667 693
668static inline int dlm_recovery_stopped(struct dlm_ls *ls) 694static inline int dlm_recovery_stopped(struct dlm_ls *ls)
669{ 695{
670 return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 696 return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
671} 697}
672 698
673static inline int dlm_no_directory(struct dlm_ls *ls) 699static inline int dlm_no_directory(struct dlm_ls *ls)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 952557d00ccd..2e99fb0c9737 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster,
582 INIT_LIST_HEAD(&ls->ls_root_list); 582 INIT_LIST_HEAD(&ls->ls_root_list);
583 init_rwsem(&ls->ls_root_sem); 583 init_rwsem(&ls->ls_root_sem);
584 584
585 down_write(&ls->ls_in_recovery);
586
587 spin_lock(&lslist_lock); 585 spin_lock(&lslist_lock);
588 ls->ls_create_count = 1; 586 ls->ls_create_count = 1;
589 list_add(&ls->ls_list, &lslist); 587 list_add(&ls->ls_list, &lslist);
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster,
597 } 595 }
598 } 596 }
599 597
600 /* needs to find ls in lslist */ 598 init_waitqueue_head(&ls->ls_recover_lock_wait);
599
600 /*
601 * Once started, dlm_recoverd first looks for ls in lslist, then
602 * initializes ls_in_recovery as locked in "down" mode. We need
603 * to wait for the wakeup from dlm_recoverd because in_recovery
604 * has to start out in down mode.
605 */
606
601 error = dlm_recoverd_start(ls); 607 error = dlm_recoverd_start(ls);
602 if (error) { 608 if (error) {
603 log_error(ls, "can't start dlm_recoverd %d", error); 609 log_error(ls, "can't start dlm_recoverd %d", error);
604 goto out_callback; 610 goto out_callback;
605 } 611 }
606 612
613 wait_event(ls->ls_recover_lock_wait,
614 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
615
607 ls->ls_kobj.kset = dlm_kset; 616 ls->ls_kobj.kset = dlm_kset;
608 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, 617 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
609 "%s", ls->ls_name); 618 "%s", ls->ls_name);
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 862640a36d5c..476557b54921 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls)
616 down_write(&ls->ls_recv_active); 616 down_write(&ls->ls_recv_active);
617 617
618 /* 618 /*
619 * Abort any recovery that's in progress (see RECOVERY_STOP, 619 * Abort any recovery that's in progress (see RECOVER_STOP,
620 * dlm_recovery_stopped()) and tell any other threads running in the 620 * dlm_recovery_stopped()) and tell any other threads running in the
621 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). 621 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
622 */ 622 */
623 623
624 spin_lock(&ls->ls_recover_lock); 624 spin_lock(&ls->ls_recover_lock);
625 set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 625 set_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
626 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); 626 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
627 ls->ls_recover_seq++; 627 ls->ls_recover_seq++;
628 spin_unlock(&ls->ls_recover_lock); 628 spin_unlock(&ls->ls_recover_lock);
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls)
642 * when recovery is complete. 642 * when recovery is complete.
643 */ 643 */
644 644
645 if (new) 645 if (new) {
646 down_write(&ls->ls_in_recovery); 646 set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags);
647 wake_up_process(ls->ls_recoverd_task);
648 wait_event(ls->ls_recover_lock_wait,
649 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
650 }
647 651
648 /* 652 /*
649 * The recoverd suspend/resume makes sure that dlm_recoverd (if 653 * The recoverd suspend/resume makes sure that dlm_recoverd (if
650 * running) has noticed RECOVERY_STOP above and quit processing the 654 * running) has noticed RECOVER_STOP above and quit processing the
651 * previous recovery. 655 * previous recovery.
652 */ 656 */
653 657
@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls)
709 kfree(rv_old); 713 kfree(rv_old);
710 } 714 }
711 715
712 dlm_recoverd_kick(ls); 716 set_bit(LSFL_RECOVER_WORK, &ls->ls_flags);
717 wake_up_process(ls->ls_recoverd_task);
713 return 0; 718 return 0;
714 719
715 fail: 720 fail:
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 87f1a56eab32..9d61947d473a 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
581 581
582 spin_lock(&ls->ls_recover_lock); 582 spin_lock(&ls->ls_recover_lock);
583 status = ls->ls_recover_status; 583 status = ls->ls_recover_status;
584 stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 584 stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
585 seq = ls->ls_recover_seq; 585 seq = ls->ls_recover_seq;
586 spin_unlock(&ls->ls_recover_lock); 586 spin_unlock(&ls->ls_recover_lock);
587 587
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 88ce65ff021e..32f9f8926ec3 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq)
41 set_bit(LSFL_RUNNING, &ls->ls_flags); 41 set_bit(LSFL_RUNNING, &ls->ls_flags);
42 /* unblocks processes waiting to enter the dlm */ 42 /* unblocks processes waiting to enter the dlm */
43 up_write(&ls->ls_in_recovery); 43 up_write(&ls->ls_in_recovery);
44 clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
44 error = 0; 45 error = 0;
45 } 46 }
46 spin_unlock(&ls->ls_recover_lock); 47 spin_unlock(&ls->ls_recover_lock);
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls)
262 rv = ls->ls_recover_args; 263 rv = ls->ls_recover_args;
263 ls->ls_recover_args = NULL; 264 ls->ls_recover_args = NULL;
264 if (rv && ls->ls_recover_seq == rv->seq) 265 if (rv && ls->ls_recover_seq == rv->seq)
265 clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); 266 clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
266 spin_unlock(&ls->ls_recover_lock); 267 spin_unlock(&ls->ls_recover_lock);
267 268
268 if (rv) { 269 if (rv) {
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg)
282 return -1; 283 return -1;
283 } 284 }
284 285
286 down_write(&ls->ls_in_recovery);
287 set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
288 wake_up(&ls->ls_recover_lock_wait);
289
285 while (!kthread_should_stop()) { 290 while (!kthread_should_stop()) {
286 set_current_state(TASK_INTERRUPTIBLE); 291 set_current_state(TASK_INTERRUPTIBLE);
287 if (!test_bit(LSFL_WORK, &ls->ls_flags)) 292 if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
293 !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags))
288 schedule(); 294 schedule();
289 set_current_state(TASK_RUNNING); 295 set_current_state(TASK_RUNNING);
290 296
291 if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) 297 if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
298 down_write(&ls->ls_in_recovery);
299 set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags);
300 wake_up(&ls->ls_recover_lock_wait);
301 }
302
303 if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags))
292 do_ls_recovery(ls); 304 do_ls_recovery(ls);
293 } 305 }
294 306
307 if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
308 up_write(&ls->ls_in_recovery);
309
295 dlm_put_lockspace(ls); 310 dlm_put_lockspace(ls);
296 return 0; 311 return 0;
297} 312}
298 313
299void dlm_recoverd_kick(struct dlm_ls *ls)
300{
301 set_bit(LSFL_WORK, &ls->ls_flags);
302 wake_up_process(ls->ls_recoverd_task);
303}
304
305int dlm_recoverd_start(struct dlm_ls *ls) 314int dlm_recoverd_start(struct dlm_ls *ls)
306{ 315{
307 struct task_struct *p; 316 struct task_struct *p;
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h
index 866657c5d69d..8856079733fa 100644
--- a/fs/dlm/recoverd.h
+++ b/fs/dlm/recoverd.h
@@ -14,7 +14,6 @@
14#ifndef __RECOVERD_DOT_H__ 14#ifndef __RECOVERD_DOT_H__
15#define __RECOVERD_DOT_H__ 15#define __RECOVERD_DOT_H__
16 16
17void dlm_recoverd_kick(struct dlm_ls *ls);
18void dlm_recoverd_stop(struct dlm_ls *ls); 17void dlm_recoverd_stop(struct dlm_ls *ls);
19int dlm_recoverd_start(struct dlm_ls *ls); 18int dlm_recoverd_start(struct dlm_ls *ls);
20void dlm_recoverd_suspend(struct dlm_ls *ls); 19void dlm_recoverd_suspend(struct dlm_ls *ls);