diff options
author | David Teigland <teigland@redhat.com> | 2012-08-02 12:08:21 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2012-08-08 12:33:49 -0400 |
commit | 475f230c6072fb2186f48b23943afcd0ee3a8343 (patch) | |
tree | 42c7979e644138ed93f30f2cd8cf2c33bb849078 | |
parent | 6ad2291624824c1de19dbbbbb6d4f9f601b60781 (diff) |
dlm: fix unlock balance warnings
The in_recovery rw_semaphore has always been acquired and
released by different threads by design. To work around
the "BUG: bad unlock balance detected!" messages, adjust
things so the dlm_recoverd thread always does both down_write
and up_write.
Signed-off-by: David Teigland <teigland@redhat.com>
-rw-r--r-- | fs/dlm/dlm_internal.h | 46 | ||||
-rw-r--r-- | fs/dlm/lockspace.c | 15 | ||||
-rw-r--r-- | fs/dlm/member.c | 17 | ||||
-rw-r--r-- | fs/dlm/rcom.c | 2 | ||||
-rw-r--r-- | fs/dlm/recoverd.c | 27 | ||||
-rw-r--r-- | fs/dlm/recoverd.h | 1 |
6 files changed, 78 insertions, 30 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 9d3e485f88c8..871c1abf6029 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -604,6 +604,7 @@ struct dlm_ls { | |||
604 | struct idr ls_recover_idr; | 604 | struct idr ls_recover_idr; |
605 | spinlock_t ls_recover_idr_lock; | 605 | spinlock_t ls_recover_idr_lock; |
606 | wait_queue_head_t ls_wait_general; | 606 | wait_queue_head_t ls_wait_general; |
607 | wait_queue_head_t ls_recover_lock_wait; | ||
607 | struct mutex ls_clear_proc_locks; | 608 | struct mutex ls_clear_proc_locks; |
608 | 609 | ||
609 | struct list_head ls_root_list; /* root resources */ | 610 | struct list_head ls_root_list; /* root resources */ |
@@ -616,15 +617,40 @@ struct dlm_ls { | |||
616 | char ls_name[1]; | 617 | char ls_name[1]; |
617 | }; | 618 | }; |
618 | 619 | ||
619 | #define LSFL_WORK 0 | 620 | /* |
620 | #define LSFL_RUNNING 1 | 621 | * LSFL_RECOVER_STOP - dlm_ls_stop() sets this to tell dlm recovery routines |
621 | #define LSFL_RECOVERY_STOP 2 | 622 | * that they should abort what they're doing so new recovery can be started. |
622 | #define LSFL_RCOM_READY 3 | 623 | * |
623 | #define LSFL_RCOM_WAIT 4 | 624 | * LSFL_RECOVER_DOWN - dlm_ls_stop() sets this to tell dlm_recoverd that it |
624 | #define LSFL_UEVENT_WAIT 5 | 625 | * should do down_write() on the in_recovery rw_semaphore. (doing down_write |
625 | #define LSFL_TIMEWARN 6 | 626 | * within dlm_ls_stop causes complaints about the lock acquired/released |
626 | #define LSFL_CB_DELAY 7 | 627 | * in different contexts.) |
627 | #define LSFL_NODIR 8 | 628 | * |
629 | * LSFL_RECOVER_LOCK - dlm_recoverd holds the in_recovery rw_semaphore. | ||
630 | * It sets this after it is done with down_write() on the in_recovery | ||
631 | * rw_semaphore and clears it after it has released the rw_semaphore. | ||
632 | * | ||
633 | * LSFL_RECOVER_WORK - dlm_ls_start() sets this to tell dlm_recoverd that it | ||
634 | * should begin recovery of the lockspace. | ||
635 | * | ||
636 | * LSFL_RUNNING - set when normal locking activity is enabled. | ||
637 | * dlm_ls_stop() clears this to tell dlm locking routines that they should | ||
638 | * quit what they are doing so recovery can run. dlm_recoverd sets | ||
639 | * this after recovery is finished. | ||
640 | */ | ||
641 | |||
642 | #define LSFL_RECOVER_STOP 0 | ||
643 | #define LSFL_RECOVER_DOWN 1 | ||
644 | #define LSFL_RECOVER_LOCK 2 | ||
645 | #define LSFL_RECOVER_WORK 3 | ||
646 | #define LSFL_RUNNING 4 | ||
647 | |||
648 | #define LSFL_RCOM_READY 5 | ||
649 | #define LSFL_RCOM_WAIT 6 | ||
650 | #define LSFL_UEVENT_WAIT 7 | ||
651 | #define LSFL_TIMEWARN 8 | ||
652 | #define LSFL_CB_DELAY 9 | ||
653 | #define LSFL_NODIR 10 | ||
628 | 654 | ||
629 | /* much of this is just saving user space pointers associated with the | 655 | /* much of this is just saving user space pointers associated with the |
630 | lock that we pass back to the user lib with an ast */ | 656 | lock that we pass back to the user lib with an ast */ |
@@ -667,7 +693,7 @@ static inline int dlm_locking_stopped(struct dlm_ls *ls) | |||
667 | 693 | ||
668 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) | 694 | static inline int dlm_recovery_stopped(struct dlm_ls *ls) |
669 | { | 695 | { |
670 | return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 696 | return test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
671 | } | 697 | } |
672 | 698 | ||
673 | static inline int dlm_no_directory(struct dlm_ls *ls) | 699 | static inline int dlm_no_directory(struct dlm_ls *ls) |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 952557d00ccd..2e99fb0c9737 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -582,8 +582,6 @@ static int new_lockspace(const char *name, const char *cluster, | |||
582 | INIT_LIST_HEAD(&ls->ls_root_list); | 582 | INIT_LIST_HEAD(&ls->ls_root_list); |
583 | init_rwsem(&ls->ls_root_sem); | 583 | init_rwsem(&ls->ls_root_sem); |
584 | 584 | ||
585 | down_write(&ls->ls_in_recovery); | ||
586 | |||
587 | spin_lock(&lslist_lock); | 585 | spin_lock(&lslist_lock); |
588 | ls->ls_create_count = 1; | 586 | ls->ls_create_count = 1; |
589 | list_add(&ls->ls_list, &lslist); | 587 | list_add(&ls->ls_list, &lslist); |
@@ -597,13 +595,24 @@ static int new_lockspace(const char *name, const char *cluster, | |||
597 | } | 595 | } |
598 | } | 596 | } |
599 | 597 | ||
600 | /* needs to find ls in lslist */ | 598 | init_waitqueue_head(&ls->ls_recover_lock_wait); |
599 | |||
600 | /* | ||
601 | * Once started, dlm_recoverd first looks for ls in lslist, then | ||
602 | * initializes ls_in_recovery as locked in "down" mode. We need | ||
603 | * to wait for the wakeup from dlm_recoverd because in_recovery | ||
604 | * has to start out in down mode. | ||
605 | */ | ||
606 | |||
601 | error = dlm_recoverd_start(ls); | 607 | error = dlm_recoverd_start(ls); |
602 | if (error) { | 608 | if (error) { |
603 | log_error(ls, "can't start dlm_recoverd %d", error); | 609 | log_error(ls, "can't start dlm_recoverd %d", error); |
604 | goto out_callback; | 610 | goto out_callback; |
605 | } | 611 | } |
606 | 612 | ||
613 | wait_event(ls->ls_recover_lock_wait, | ||
614 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
615 | |||
607 | ls->ls_kobj.kset = dlm_kset; | 616 | ls->ls_kobj.kset = dlm_kset; |
608 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, | 617 | error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, |
609 | "%s", ls->ls_name); | 618 | "%s", ls->ls_name); |
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 862640a36d5c..476557b54921 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -616,13 +616,13 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
616 | down_write(&ls->ls_recv_active); | 616 | down_write(&ls->ls_recv_active); |
617 | 617 | ||
618 | /* | 618 | /* |
619 | * Abort any recovery that's in progress (see RECOVERY_STOP, | 619 | * Abort any recovery that's in progress (see RECOVER_STOP, |
620 | * dlm_recovery_stopped()) and tell any other threads running in the | 620 | * dlm_recovery_stopped()) and tell any other threads running in the |
621 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). | 621 | * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). |
622 | */ | 622 | */ |
623 | 623 | ||
624 | spin_lock(&ls->ls_recover_lock); | 624 | spin_lock(&ls->ls_recover_lock); |
625 | set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 625 | set_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
626 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); | 626 | new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags); |
627 | ls->ls_recover_seq++; | 627 | ls->ls_recover_seq++; |
628 | spin_unlock(&ls->ls_recover_lock); | 628 | spin_unlock(&ls->ls_recover_lock); |
@@ -642,12 +642,16 @@ int dlm_ls_stop(struct dlm_ls *ls) | |||
642 | * when recovery is complete. | 642 | * when recovery is complete. |
643 | */ | 643 | */ |
644 | 644 | ||
645 | if (new) | 645 | if (new) { |
646 | down_write(&ls->ls_in_recovery); | 646 | set_bit(LSFL_RECOVER_DOWN, &ls->ls_flags); |
647 | wake_up_process(ls->ls_recoverd_task); | ||
648 | wait_event(ls->ls_recover_lock_wait, | ||
649 | test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)); | ||
650 | } | ||
647 | 651 | ||
648 | /* | 652 | /* |
649 | * The recoverd suspend/resume makes sure that dlm_recoverd (if | 653 | * The recoverd suspend/resume makes sure that dlm_recoverd (if |
650 | * running) has noticed RECOVERY_STOP above and quit processing the | 654 | * running) has noticed RECOVER_STOP above and quit processing the |
651 | * previous recovery. | 655 | * previous recovery. |
652 | */ | 656 | */ |
653 | 657 | ||
@@ -709,7 +713,8 @@ int dlm_ls_start(struct dlm_ls *ls) | |||
709 | kfree(rv_old); | 713 | kfree(rv_old); |
710 | } | 714 | } |
711 | 715 | ||
712 | dlm_recoverd_kick(ls); | 716 | set_bit(LSFL_RECOVER_WORK, &ls->ls_flags); |
717 | wake_up_process(ls->ls_recoverd_task); | ||
713 | return 0; | 718 | return 0; |
714 | 719 | ||
715 | fail: | 720 | fail: |
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 87f1a56eab32..9d61947d473a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c | |||
@@ -581,7 +581,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) | |||
581 | 581 | ||
582 | spin_lock(&ls->ls_recover_lock); | 582 | spin_lock(&ls->ls_recover_lock); |
583 | status = ls->ls_recover_status; | 583 | status = ls->ls_recover_status; |
584 | stop = test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 584 | stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
585 | seq = ls->ls_recover_seq; | 585 | seq = ls->ls_recover_seq; |
586 | spin_unlock(&ls->ls_recover_lock); | 586 | spin_unlock(&ls->ls_recover_lock); |
587 | 587 | ||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 88ce65ff021e..32f9f8926ec3 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -41,6 +41,7 @@ static int enable_locking(struct dlm_ls *ls, uint64_t seq) | |||
41 | set_bit(LSFL_RUNNING, &ls->ls_flags); | 41 | set_bit(LSFL_RUNNING, &ls->ls_flags); |
42 | /* unblocks processes waiting to enter the dlm */ | 42 | /* unblocks processes waiting to enter the dlm */ |
43 | up_write(&ls->ls_in_recovery); | 43 | up_write(&ls->ls_in_recovery); |
44 | clear_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
44 | error = 0; | 45 | error = 0; |
45 | } | 46 | } |
46 | spin_unlock(&ls->ls_recover_lock); | 47 | spin_unlock(&ls->ls_recover_lock); |
@@ -262,7 +263,7 @@ static void do_ls_recovery(struct dlm_ls *ls) | |||
262 | rv = ls->ls_recover_args; | 263 | rv = ls->ls_recover_args; |
263 | ls->ls_recover_args = NULL; | 264 | ls->ls_recover_args = NULL; |
264 | if (rv && ls->ls_recover_seq == rv->seq) | 265 | if (rv && ls->ls_recover_seq == rv->seq) |
265 | clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags); | 266 | clear_bit(LSFL_RECOVER_STOP, &ls->ls_flags); |
266 | spin_unlock(&ls->ls_recover_lock); | 267 | spin_unlock(&ls->ls_recover_lock); |
267 | 268 | ||
268 | if (rv) { | 269 | if (rv) { |
@@ -282,26 +283,34 @@ static int dlm_recoverd(void *arg) | |||
282 | return -1; | 283 | return -1; |
283 | } | 284 | } |
284 | 285 | ||
286 | down_write(&ls->ls_in_recovery); | ||
287 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
288 | wake_up(&ls->ls_recover_lock_wait); | ||
289 | |||
285 | while (!kthread_should_stop()) { | 290 | while (!kthread_should_stop()) { |
286 | set_current_state(TASK_INTERRUPTIBLE); | 291 | set_current_state(TASK_INTERRUPTIBLE); |
287 | if (!test_bit(LSFL_WORK, &ls->ls_flags)) | 292 | if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) && |
293 | !test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) | ||
288 | schedule(); | 294 | schedule(); |
289 | set_current_state(TASK_RUNNING); | 295 | set_current_state(TASK_RUNNING); |
290 | 296 | ||
291 | if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | 297 | if (test_and_clear_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) { |
298 | down_write(&ls->ls_in_recovery); | ||
299 | set_bit(LSFL_RECOVER_LOCK, &ls->ls_flags); | ||
300 | wake_up(&ls->ls_recover_lock_wait); | ||
301 | } | ||
302 | |||
303 | if (test_and_clear_bit(LSFL_RECOVER_WORK, &ls->ls_flags)) | ||
292 | do_ls_recovery(ls); | 304 | do_ls_recovery(ls); |
293 | } | 305 | } |
294 | 306 | ||
307 | if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags)) | ||
308 | up_write(&ls->ls_in_recovery); | ||
309 | |||
295 | dlm_put_lockspace(ls); | 310 | dlm_put_lockspace(ls); |
296 | return 0; | 311 | return 0; |
297 | } | 312 | } |
298 | 313 | ||
299 | void dlm_recoverd_kick(struct dlm_ls *ls) | ||
300 | { | ||
301 | set_bit(LSFL_WORK, &ls->ls_flags); | ||
302 | wake_up_process(ls->ls_recoverd_task); | ||
303 | } | ||
304 | |||
305 | int dlm_recoverd_start(struct dlm_ls *ls) | 314 | int dlm_recoverd_start(struct dlm_ls *ls) |
306 | { | 315 | { |
307 | struct task_struct *p; | 316 | struct task_struct *p; |
diff --git a/fs/dlm/recoverd.h b/fs/dlm/recoverd.h index 866657c5d69d..8856079733fa 100644 --- a/fs/dlm/recoverd.h +++ b/fs/dlm/recoverd.h | |||
@@ -14,7 +14,6 @@ | |||
14 | #ifndef __RECOVERD_DOT_H__ | 14 | #ifndef __RECOVERD_DOT_H__ |
15 | #define __RECOVERD_DOT_H__ | 15 | #define __RECOVERD_DOT_H__ |
16 | 16 | ||
17 | void dlm_recoverd_kick(struct dlm_ls *ls); | ||
18 | void dlm_recoverd_stop(struct dlm_ls *ls); | 17 | void dlm_recoverd_stop(struct dlm_ls *ls); |
19 | int dlm_recoverd_start(struct dlm_ls *ls); | 18 | int dlm_recoverd_start(struct dlm_ls *ls); |
20 | void dlm_recoverd_suspend(struct dlm_ls *ls); | 19 | void dlm_recoverd_suspend(struct dlm_ls *ls); |