aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h56
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c44
-rw-r--r--fs/ocfs2/dlm/dlmlock.c52
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c175
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c164
-rw-r--r--fs/ocfs2/dlm/dlmthread.c16
6 files changed, 237 insertions, 270 deletions
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index d602abb51b61..a5952ceecba5 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -859,8 +859,8 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
859void dlm_wait_for_recovery(struct dlm_ctxt *dlm); 859void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
860void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); 860void dlm_kick_recovery_thread(struct dlm_ctxt *dlm);
861int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); 861int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
862int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); 862void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
863int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); 863void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout);
864 864
865void dlm_put(struct dlm_ctxt *dlm); 865void dlm_put(struct dlm_ctxt *dlm);
866struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); 866struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
@@ -877,9 +877,8 @@ static inline void dlm_lockres_get(struct dlm_lock_resource *res)
877 kref_get(&res->refs); 877 kref_get(&res->refs);
878} 878}
879void dlm_lockres_put(struct dlm_lock_resource *res); 879void dlm_lockres_put(struct dlm_lock_resource *res);
880void __dlm_unhash_lockres(struct dlm_lock_resource *res); 880void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
881void __dlm_insert_lockres(struct dlm_ctxt *dlm, 881void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
882 struct dlm_lock_resource *res);
883struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, 882struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
884 const char *name, 883 const char *name,
885 unsigned int len, 884 unsigned int len,
@@ -902,46 +901,15 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
902 const char *name, 901 const char *name,
903 unsigned int namelen); 902 unsigned int namelen);
904 903
905#define dlm_lockres_set_refmap_bit(bit,res) \ 904void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm,
906 __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) 905 struct dlm_lock_resource *res, int bit);
907#define dlm_lockres_clear_refmap_bit(bit,res) \ 906void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
908 __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) 907 struct dlm_lock_resource *res, int bit);
909 908
910static inline void __dlm_lockres_set_refmap_bit(int bit, 909void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
911 struct dlm_lock_resource *res, 910 struct dlm_lock_resource *res);
912 const char *file, 911void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
913 int line) 912 struct dlm_lock_resource *res);
914{
915 //printk("%s:%d:%.*s: setting bit %d\n", file, line,
916 // res->lockname.len, res->lockname.name, bit);
917 set_bit(bit, res->refmap);
918}
919
920static inline void __dlm_lockres_clear_refmap_bit(int bit,
921 struct dlm_lock_resource *res,
922 const char *file,
923 int line)
924{
925 //printk("%s:%d:%.*s: clearing bit %d\n", file, line,
926 // res->lockname.len, res->lockname.name, bit);
927 clear_bit(bit, res->refmap);
928}
929
930void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
931 struct dlm_lock_resource *res,
932 const char *file,
933 int line);
934void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
935 struct dlm_lock_resource *res,
936 int new_lockres,
937 const char *file,
938 int line);
939#define dlm_lockres_drop_inflight_ref(d,r) \
940 __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__)
941#define dlm_lockres_grab_inflight_ref(d,r) \
942 __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__)
943#define dlm_lockres_grab_inflight_ref_new(d,r) \
944 __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__)
945 913
946void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 914void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
947void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 915void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6ed6b95dcf93..92f2ead0fab6 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -157,16 +157,18 @@ static int dlm_protocol_compare(struct dlm_protocol_version *existing,
157 157
158static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); 158static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
159 159
160void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) 160void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
161{ 161{
162 if (!hlist_unhashed(&lockres->hash_node)) { 162 if (hlist_unhashed(&res->hash_node))
163 hlist_del_init(&lockres->hash_node); 163 return;
164 dlm_lockres_put(lockres); 164
165 } 165 mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len,
166 res->lockname.name);
167 hlist_del_init(&res->hash_node);
168 dlm_lockres_put(res);
166} 169}
167 170
168void __dlm_insert_lockres(struct dlm_ctxt *dlm, 171void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
169 struct dlm_lock_resource *res)
170{ 172{
171 struct hlist_head *bucket; 173 struct hlist_head *bucket;
172 struct qstr *q; 174 struct qstr *q;
@@ -180,6 +182,9 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
180 dlm_lockres_get(res); 182 dlm_lockres_get(res);
181 183
182 hlist_add_head(&res->hash_node, bucket); 184 hlist_add_head(&res->hash_node, bucket);
185
186 mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len,
187 res->lockname.name);
183} 188}
184 189
185struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, 190struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
@@ -539,17 +544,17 @@ again:
539 544
540static void __dlm_print_nodes(struct dlm_ctxt *dlm) 545static void __dlm_print_nodes(struct dlm_ctxt *dlm)
541{ 546{
542 int node = -1; 547 int node = -1, num = 0;
543 548
544 assert_spin_locked(&dlm->spinlock); 549 assert_spin_locked(&dlm->spinlock);
545 550
546 printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name); 551 printk("( ");
547
548 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 552 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
549 node + 1)) < O2NM_MAX_NODES) { 553 node + 1)) < O2NM_MAX_NODES) {
550 printk("%d ", node); 554 printk("%d ", node);
555 ++num;
551 } 556 }
552 printk("\n"); 557 printk(") %u nodes\n", num);
553} 558}
554 559
555static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, 560static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -566,11 +571,10 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
566 571
567 node = exit_msg->node_idx; 572 node = exit_msg->node_idx;
568 573
569 printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
570
571 spin_lock(&dlm->spinlock); 574 spin_lock(&dlm->spinlock);
572 clear_bit(node, dlm->domain_map); 575 clear_bit(node, dlm->domain_map);
573 clear_bit(node, dlm->exit_domain_map); 576 clear_bit(node, dlm->exit_domain_map);
577 printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name);
574 __dlm_print_nodes(dlm); 578 __dlm_print_nodes(dlm);
575 579
576 /* notify anything attached to the heartbeat events */ 580 /* notify anything attached to the heartbeat events */
@@ -755,6 +759,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
755 759
756 dlm_mark_domain_leaving(dlm); 760 dlm_mark_domain_leaving(dlm);
757 dlm_leave_domain(dlm); 761 dlm_leave_domain(dlm);
762 printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name);
758 dlm_force_free_mles(dlm); 763 dlm_force_free_mles(dlm);
759 dlm_complete_dlm_shutdown(dlm); 764 dlm_complete_dlm_shutdown(dlm);
760 } 765 }
@@ -970,7 +975,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
970 clear_bit(assert->node_idx, dlm->exit_domain_map); 975 clear_bit(assert->node_idx, dlm->exit_domain_map);
971 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 976 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
972 977
973 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", 978 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ",
974 assert->node_idx, dlm->name); 979 assert->node_idx, dlm->name);
975 __dlm_print_nodes(dlm); 980 __dlm_print_nodes(dlm);
976 981
@@ -1701,8 +1706,10 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1701bail: 1706bail:
1702 spin_lock(&dlm->spinlock); 1707 spin_lock(&dlm->spinlock);
1703 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 1708 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
1704 if (!status) 1709 if (!status) {
1710 printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name);
1705 __dlm_print_nodes(dlm); 1711 __dlm_print_nodes(dlm);
1712 }
1706 spin_unlock(&dlm->spinlock); 1713 spin_unlock(&dlm->spinlock);
1707 1714
1708 if (ctxt) { 1715 if (ctxt) {
@@ -2131,13 +2138,6 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
2131 goto leave; 2138 goto leave;
2132 } 2139 }
2133 2140
2134 if (!o2hb_check_local_node_heartbeating()) {
2135 mlog(ML_ERROR, "the local node has not been configured, or is "
2136 "not heartbeating\n");
2137 ret = -EPROTO;
2138 goto leave;
2139 }
2140
2141 mlog(0, "register called for domain \"%s\"\n", domain); 2141 mlog(0, "register called for domain \"%s\"\n", domain);
2142 2142
2143retry: 2143retry:
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 8d39e0fd66f7..f32fcba04923 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -183,10 +183,6 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
183 kick_thread = 1; 183 kick_thread = 1;
184 } 184 }
185 } 185 }
186 /* reduce the inflight count, this may result in the lockres
187 * being purged below during calc_usage */
188 if (lock->ml.node == dlm->node_num)
189 dlm_lockres_drop_inflight_ref(dlm, res);
190 186
191 spin_unlock(&res->spinlock); 187 spin_unlock(&res->spinlock);
192 wake_up(&res->wq); 188 wake_up(&res->wq);
@@ -231,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
231 lock->ml.type, res->lockname.len, 227 lock->ml.type, res->lockname.len,
232 res->lockname.name, flags); 228 res->lockname.name, flags);
233 229
230 /*
231 * Wait if resource is getting recovered, remastered, etc.
232 * If the resource was remastered and new owner is self, then exit.
233 */
234 spin_lock(&res->spinlock); 234 spin_lock(&res->spinlock);
235
236 /* will exit this call with spinlock held */
237 __dlm_wait_on_lockres(res); 235 __dlm_wait_on_lockres(res);
236 if (res->owner == dlm->node_num) {
237 spin_unlock(&res->spinlock);
238 return DLM_RECOVERING;
239 }
238 res->state |= DLM_LOCK_RES_IN_PROGRESS; 240 res->state |= DLM_LOCK_RES_IN_PROGRESS;
239 241
240 /* add lock to local (secondary) queue */ 242 /* add lock to local (secondary) queue */
@@ -319,27 +321,23 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
319 tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, 321 tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create,
320 sizeof(create), res->owner, &status); 322 sizeof(create), res->owner, &status);
321 if (tmpret >= 0) { 323 if (tmpret >= 0) {
322 // successfully sent and received 324 ret = status;
323 ret = status; // this is already a dlm_status
324 if (ret == DLM_REJECTED) { 325 if (ret == DLM_REJECTED) {
325 mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres " 326 mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer "
326 "no longer owned by %u. that node is coming back " 327 "owned by node %u. That node is coming back up "
327 "up currently.\n", dlm->name, create.namelen, 328 "currently.\n", dlm->name, create.namelen,
328 create.name, res->owner); 329 create.name, res->owner);
329 dlm_print_one_lock_resource(res); 330 dlm_print_one_lock_resource(res);
330 BUG(); 331 BUG();
331 } 332 }
332 } else { 333 } else {
333 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 334 mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to "
334 "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key, 335 "node %u\n", dlm->name, create.namelen, create.name,
335 res->owner); 336 tmpret, res->owner);
336 if (dlm_is_host_down(tmpret)) { 337 if (dlm_is_host_down(tmpret))
337 ret = DLM_RECOVERING; 338 ret = DLM_RECOVERING;
338 mlog(0, "node %u died so returning DLM_RECOVERING " 339 else
339 "from lock message!\n", res->owner);
340 } else {
341 ret = dlm_err_to_dlm_status(tmpret); 340 ret = dlm_err_to_dlm_status(tmpret);
342 }
343 } 341 }
344 342
345 return ret; 343 return ret;
@@ -718,18 +716,10 @@ retry_lock:
718 716
719 if (status == DLM_RECOVERING || status == DLM_MIGRATING || 717 if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
720 status == DLM_FORWARD) { 718 status == DLM_FORWARD) {
721 mlog(0, "retrying lock with migration/"
722 "recovery/in progress\n");
723 msleep(100); 719 msleep(100);
724 /* no waiting for dlm_reco_thread */
725 if (recovery) { 720 if (recovery) {
726 if (status != DLM_RECOVERING) 721 if (status != DLM_RECOVERING)
727 goto retry_lock; 722 goto retry_lock;
728
729 mlog(0, "%s: got RECOVERING "
730 "for $RECOVERY lock, master "
731 "was %u\n", dlm->name,
732 res->owner);
733 /* wait to see the node go down, then 723 /* wait to see the node go down, then
734 * drop down and allow the lockres to 724 * drop down and allow the lockres to
735 * get cleaned up. need to remaster. */ 725 * get cleaned up. need to remaster. */
@@ -741,6 +731,14 @@ retry_lock:
741 } 731 }
742 } 732 }
743 733
734 /* Inflight taken in dlm_get_lock_resource() is dropped here */
735 spin_lock(&res->spinlock);
736 dlm_lockres_drop_inflight_ref(dlm, res);
737 spin_unlock(&res->spinlock);
738
739 dlm_lockres_calc_usage(dlm, res);
740 dlm_kick_thread(dlm, res);
741
744 if (status != DLM_NORMAL) { 742 if (status != DLM_NORMAL) {
745 lock->lksb->flags &= ~DLM_LKSB_GET_LVB; 743 lock->lksb->flags &= ~DLM_LKSB_GET_LVB;
746 if (status != DLM_NOTQUEUED) 744 if (status != DLM_NOTQUEUED)
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 11eefb8c12e9..005261c333b0 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -631,39 +631,54 @@ error:
631 return NULL; 631 return NULL;
632} 632}
633 633
634void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, 634void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm,
635 struct dlm_lock_resource *res, 635 struct dlm_lock_resource *res, int bit)
636 int new_lockres,
637 const char *file,
638 int line)
639{ 636{
640 if (!new_lockres) 637 assert_spin_locked(&res->spinlock);
641 assert_spin_locked(&res->spinlock); 638
639 mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len,
640 res->lockname.name, bit, __builtin_return_address(0));
641
642 set_bit(bit, res->refmap);
643}
644
645void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
646 struct dlm_lock_resource *res, int bit)
647{
648 assert_spin_locked(&res->spinlock);
649
650 mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len,
651 res->lockname.name, bit, __builtin_return_address(0));
652
653 clear_bit(bit, res->refmap);
654}
655
656
657void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
658 struct dlm_lock_resource *res)
659{
660 assert_spin_locked(&res->spinlock);
642 661
643 if (!test_bit(dlm->node_num, res->refmap)) {
644 BUG_ON(res->inflight_locks != 0);
645 dlm_lockres_set_refmap_bit(dlm->node_num, res);
646 }
647 res->inflight_locks++; 662 res->inflight_locks++;
648 mlog(0, "%s:%.*s: inflight++: now %u\n", 663
649 dlm->name, res->lockname.len, res->lockname.name, 664 mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
650 res->inflight_locks); 665 res->lockname.len, res->lockname.name, res->inflight_locks,
666 __builtin_return_address(0));
651} 667}
652 668
653void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, 669void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
654 struct dlm_lock_resource *res, 670 struct dlm_lock_resource *res)
655 const char *file,
656 int line)
657{ 671{
658 assert_spin_locked(&res->spinlock); 672 assert_spin_locked(&res->spinlock);
659 673
660 BUG_ON(res->inflight_locks == 0); 674 BUG_ON(res->inflight_locks == 0);
675
661 res->inflight_locks--; 676 res->inflight_locks--;
662 mlog(0, "%s:%.*s: inflight--: now %u\n", 677
663 dlm->name, res->lockname.len, res->lockname.name, 678 mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name,
664 res->inflight_locks); 679 res->lockname.len, res->lockname.name, res->inflight_locks,
665 if (res->inflight_locks == 0) 680 __builtin_return_address(0));
666 dlm_lockres_clear_refmap_bit(dlm->node_num, res); 681
667 wake_up(&res->wq); 682 wake_up(&res->wq);
668} 683}
669 684
@@ -697,7 +712,6 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
697 unsigned int hash; 712 unsigned int hash;
698 int tries = 0; 713 int tries = 0;
699 int bit, wait_on_recovery = 0; 714 int bit, wait_on_recovery = 0;
700 int drop_inflight_if_nonlocal = 0;
701 715
702 BUG_ON(!lockid); 716 BUG_ON(!lockid);
703 717
@@ -709,36 +723,33 @@ lookup:
709 spin_lock(&dlm->spinlock); 723 spin_lock(&dlm->spinlock);
710 tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); 724 tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
711 if (tmpres) { 725 if (tmpres) {
712 int dropping_ref = 0;
713
714 spin_unlock(&dlm->spinlock); 726 spin_unlock(&dlm->spinlock);
715
716 spin_lock(&tmpres->spinlock); 727 spin_lock(&tmpres->spinlock);
717 /* We wait for the other thread that is mastering the resource */ 728 /* Wait on the thread that is mastering the resource */
718 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 729 if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
719 __dlm_wait_on_lockres(tmpres); 730 __dlm_wait_on_lockres(tmpres);
720 BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); 731 BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
732 spin_unlock(&tmpres->spinlock);
733 dlm_lockres_put(tmpres);
734 tmpres = NULL;
735 goto lookup;
721 } 736 }
722 737
723 if (tmpres->owner == dlm->node_num) { 738 /* Wait on the resource purge to complete before continuing */
724 BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); 739 if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) {
725 dlm_lockres_grab_inflight_ref(dlm, tmpres); 740 BUG_ON(tmpres->owner == dlm->node_num);
726 } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) 741 __dlm_wait_on_lockres_flags(tmpres,
727 dropping_ref = 1; 742 DLM_LOCK_RES_DROPPING_REF);
728 spin_unlock(&tmpres->spinlock);
729
730 /* wait until done messaging the master, drop our ref to allow
731 * the lockres to be purged, start over. */
732 if (dropping_ref) {
733 spin_lock(&tmpres->spinlock);
734 __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF);
735 spin_unlock(&tmpres->spinlock); 743 spin_unlock(&tmpres->spinlock);
736 dlm_lockres_put(tmpres); 744 dlm_lockres_put(tmpres);
737 tmpres = NULL; 745 tmpres = NULL;
738 goto lookup; 746 goto lookup;
739 } 747 }
740 748
741 mlog(0, "found in hash!\n"); 749 /* Grab inflight ref to pin the resource */
750 dlm_lockres_grab_inflight_ref(dlm, tmpres);
751
752 spin_unlock(&tmpres->spinlock);
742 if (res) 753 if (res)
743 dlm_lockres_put(res); 754 dlm_lockres_put(res);
744 res = tmpres; 755 res = tmpres;
@@ -829,8 +840,8 @@ lookup:
829 * but they might own this lockres. wait on them. */ 840 * but they might own this lockres. wait on them. */
830 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); 841 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
831 if (bit < O2NM_MAX_NODES) { 842 if (bit < O2NM_MAX_NODES) {
832 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " 843 mlog(0, "%s: res %.*s, At least one node (%d) "
833 "recover before lock mastery can begin\n", 844 "to recover before lock mastery can begin\n",
834 dlm->name, namelen, (char *)lockid, bit); 845 dlm->name, namelen, (char *)lockid, bit);
835 wait_on_recovery = 1; 846 wait_on_recovery = 1;
836 } 847 }
@@ -843,12 +854,11 @@ lookup:
843 854
844 /* finally add the lockres to its hash bucket */ 855 /* finally add the lockres to its hash bucket */
845 __dlm_insert_lockres(dlm, res); 856 __dlm_insert_lockres(dlm, res);
846 /* since this lockres is new it doesn't not require the spinlock */
847 dlm_lockres_grab_inflight_ref_new(dlm, res);
848 857
849 /* if this node does not become the master make sure to drop 858 /* Grab inflight ref to pin the resource */
850 * this inflight reference below */ 859 spin_lock(&res->spinlock);
851 drop_inflight_if_nonlocal = 1; 860 dlm_lockres_grab_inflight_ref(dlm, res);
861 spin_unlock(&res->spinlock);
852 862
853 /* get an extra ref on the mle in case this is a BLOCK 863 /* get an extra ref on the mle in case this is a BLOCK
854 * if so, the creator of the BLOCK may try to put the last 864 * if so, the creator of the BLOCK may try to put the last
@@ -864,8 +874,8 @@ redo_request:
864 * dlm spinlock would be detectable be a change on the mle, 874 * dlm spinlock would be detectable be a change on the mle,
865 * so we only need to clear out the recovery map once. */ 875 * so we only need to clear out the recovery map once. */
866 if (dlm_is_recovery_lock(lockid, namelen)) { 876 if (dlm_is_recovery_lock(lockid, namelen)) {
867 mlog(ML_NOTICE, "%s: recovery map is not empty, but " 877 mlog(0, "%s: Recovery map is not empty, but must "
868 "must master $RECOVERY lock now\n", dlm->name); 878 "master $RECOVERY lock now\n", dlm->name);
869 if (!dlm_pre_master_reco_lockres(dlm, res)) 879 if (!dlm_pre_master_reco_lockres(dlm, res))
870 wait_on_recovery = 0; 880 wait_on_recovery = 0;
871 else { 881 else {
@@ -883,8 +893,8 @@ redo_request:
883 spin_lock(&dlm->spinlock); 893 spin_lock(&dlm->spinlock);
884 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); 894 bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
885 if (bit < O2NM_MAX_NODES) { 895 if (bit < O2NM_MAX_NODES) {
886 mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " 896 mlog(0, "%s: res %.*s, At least one node (%d) "
887 "recover before lock mastery can begin\n", 897 "to recover before lock mastery can begin\n",
888 dlm->name, namelen, (char *)lockid, bit); 898 dlm->name, namelen, (char *)lockid, bit);
889 wait_on_recovery = 1; 899 wait_on_recovery = 1;
890 } else 900 } else
@@ -913,8 +923,8 @@ redo_request:
913 * yet, keep going until it does. this is how the 923 * yet, keep going until it does. this is how the
914 * master will know that asserts are needed back to 924 * master will know that asserts are needed back to
915 * the lower nodes. */ 925 * the lower nodes. */
916 mlog(0, "%s:%.*s: requests only up to %u but master " 926 mlog(0, "%s: res %.*s, Requests only up to %u but "
917 "is %u, keep going\n", dlm->name, namelen, 927 "master is %u, keep going\n", dlm->name, namelen,
918 lockid, nodenum, mle->master); 928 lockid, nodenum, mle->master);
919 } 929 }
920 } 930 }
@@ -924,13 +934,12 @@ wait:
924 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); 934 ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked);
925 if (ret < 0) { 935 if (ret < 0) {
926 wait_on_recovery = 1; 936 wait_on_recovery = 1;
927 mlog(0, "%s:%.*s: node map changed, redo the " 937 mlog(0, "%s: res %.*s, Node map changed, redo the master "
928 "master request now, blocked=%d\n", 938 "request now, blocked=%d\n", dlm->name, res->lockname.len,
929 dlm->name, res->lockname.len,
930 res->lockname.name, blocked); 939 res->lockname.name, blocked);
931 if (++tries > 20) { 940 if (++tries > 20) {
932 mlog(ML_ERROR, "%s:%.*s: spinning on " 941 mlog(ML_ERROR, "%s: res %.*s, Spinning on "
933 "dlm_wait_for_lock_mastery, blocked=%d\n", 942 "dlm_wait_for_lock_mastery, blocked = %d\n",
934 dlm->name, res->lockname.len, 943 dlm->name, res->lockname.len,
935 res->lockname.name, blocked); 944 res->lockname.name, blocked);
936 dlm_print_one_lock_resource(res); 945 dlm_print_one_lock_resource(res);
@@ -940,7 +949,8 @@ wait:
940 goto redo_request; 949 goto redo_request;
941 } 950 }
942 951
943 mlog(0, "lockres mastered by %u\n", res->owner); 952 mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len,
953 res->lockname.name, res->owner);
944 /* make sure we never continue without this */ 954 /* make sure we never continue without this */
945 BUG_ON(res->owner == O2NM_MAX_NODES); 955 BUG_ON(res->owner == O2NM_MAX_NODES);
946 956
@@ -952,8 +962,6 @@ wait:
952 962
953wake_waiters: 963wake_waiters:
954 spin_lock(&res->spinlock); 964 spin_lock(&res->spinlock);
955 if (res->owner != dlm->node_num && drop_inflight_if_nonlocal)
956 dlm_lockres_drop_inflight_ref(dlm, res);
957 res->state &= ~DLM_LOCK_RES_IN_PROGRESS; 965 res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
958 spin_unlock(&res->spinlock); 966 spin_unlock(&res->spinlock);
959 wake_up(&res->wq); 967 wake_up(&res->wq);
@@ -1426,9 +1434,7 @@ way_up_top:
1426 } 1434 }
1427 1435
1428 if (res->owner == dlm->node_num) { 1436 if (res->owner == dlm->node_num) {
1429 mlog(0, "%s:%.*s: setting bit %u in refmap\n", 1437 dlm_lockres_set_refmap_bit(dlm, res, request->node_idx);
1430 dlm->name, namelen, name, request->node_idx);
1431 dlm_lockres_set_refmap_bit(request->node_idx, res);
1432 spin_unlock(&res->spinlock); 1438 spin_unlock(&res->spinlock);
1433 response = DLM_MASTER_RESP_YES; 1439 response = DLM_MASTER_RESP_YES;
1434 if (mle) 1440 if (mle)
@@ -1493,10 +1499,8 @@ way_up_top:
1493 * go back and clean the mles on any 1499 * go back and clean the mles on any
1494 * other nodes */ 1500 * other nodes */
1495 dispatch_assert = 1; 1501 dispatch_assert = 1;
1496 dlm_lockres_set_refmap_bit(request->node_idx, res); 1502 dlm_lockres_set_refmap_bit(dlm, res,
1497 mlog(0, "%s:%.*s: setting bit %u in refmap\n", 1503 request->node_idx);
1498 dlm->name, namelen, name,
1499 request->node_idx);
1500 } else 1504 } else
1501 response = DLM_MASTER_RESP_NO; 1505 response = DLM_MASTER_RESP_NO;
1502 } else { 1506 } else {
@@ -1702,7 +1706,7 @@ again:
1702 "lockres, set the bit in the refmap\n", 1706 "lockres, set the bit in the refmap\n",
1703 namelen, lockname, to); 1707 namelen, lockname, to);
1704 spin_lock(&res->spinlock); 1708 spin_lock(&res->spinlock);
1705 dlm_lockres_set_refmap_bit(to, res); 1709 dlm_lockres_set_refmap_bit(dlm, res, to);
1706 spin_unlock(&res->spinlock); 1710 spin_unlock(&res->spinlock);
1707 } 1711 }
1708 } 1712 }
@@ -2187,8 +2191,6 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2187 namelen = res->lockname.len; 2191 namelen = res->lockname.len;
2188 BUG_ON(namelen > O2NM_MAX_NAME_LEN); 2192 BUG_ON(namelen > O2NM_MAX_NAME_LEN);
2189 2193
2190 mlog(0, "%s:%.*s: sending deref to %d\n",
2191 dlm->name, namelen, lockname, res->owner);
2192 memset(&deref, 0, sizeof(deref)); 2194 memset(&deref, 0, sizeof(deref));
2193 deref.node_idx = dlm->node_num; 2195 deref.node_idx = dlm->node_num;
2194 deref.namelen = namelen; 2196 deref.namelen = namelen;
@@ -2197,14 +2199,12 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2197 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, 2199 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
2198 &deref, sizeof(deref), res->owner, &r); 2200 &deref, sizeof(deref), res->owner, &r);
2199 if (ret < 0) 2201 if (ret < 0)
2200 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " 2202 mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n",
2201 "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key, 2203 dlm->name, namelen, lockname, ret, res->owner);
2202 res->owner);
2203 else if (r < 0) { 2204 else if (r < 0) {
2204 /* BAD. other node says I did not have a ref. */ 2205 /* BAD. other node says I did not have a ref. */
2205 mlog(ML_ERROR,"while dropping ref on %s:%.*s " 2206 mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
2206 "(master=%u) got %d.\n", dlm->name, namelen, 2207 dlm->name, namelen, lockname, res->owner, r);
2207 lockname, res->owner, r);
2208 dlm_print_one_lock_resource(res); 2208 dlm_print_one_lock_resource(res);
2209 BUG(); 2209 BUG();
2210 } 2210 }
@@ -2260,7 +2260,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
2260 else { 2260 else {
2261 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); 2261 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
2262 if (test_bit(node, res->refmap)) { 2262 if (test_bit(node, res->refmap)) {
2263 dlm_lockres_clear_refmap_bit(node, res); 2263 dlm_lockres_clear_refmap_bit(dlm, res, node);
2264 cleared = 1; 2264 cleared = 1;
2265 } 2265 }
2266 } 2266 }
@@ -2320,7 +2320,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2320 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); 2320 BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
2321 if (test_bit(node, res->refmap)) { 2321 if (test_bit(node, res->refmap)) {
2322 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); 2322 __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
2323 dlm_lockres_clear_refmap_bit(node, res); 2323 dlm_lockres_clear_refmap_bit(dlm, res, node);
2324 cleared = 1; 2324 cleared = 1;
2325 } 2325 }
2326 spin_unlock(&res->spinlock); 2326 spin_unlock(&res->spinlock);
@@ -2802,7 +2802,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2802 BUG_ON(!list_empty(&lock->bast_list)); 2802 BUG_ON(!list_empty(&lock->bast_list));
2803 BUG_ON(lock->ast_pending); 2803 BUG_ON(lock->ast_pending);
2804 BUG_ON(lock->bast_pending); 2804 BUG_ON(lock->bast_pending);
2805 dlm_lockres_clear_refmap_bit(lock->ml.node, res); 2805 dlm_lockres_clear_refmap_bit(dlm, res,
2806 lock->ml.node);
2806 list_del_init(&lock->list); 2807 list_del_init(&lock->list);
2807 dlm_lock_put(lock); 2808 dlm_lock_put(lock);
2808 /* In a normal unlock, we would have added a 2809 /* In a normal unlock, we would have added a
@@ -2823,7 +2824,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2823 mlog(0, "%s:%.*s: node %u had a ref to this " 2824 mlog(0, "%s:%.*s: node %u had a ref to this "
2824 "migrating lockres, clearing\n", dlm->name, 2825 "migrating lockres, clearing\n", dlm->name,
2825 res->lockname.len, res->lockname.name, bit); 2826 res->lockname.len, res->lockname.name, bit);
2826 dlm_lockres_clear_refmap_bit(bit, res); 2827 dlm_lockres_clear_refmap_bit(dlm, res, bit);
2827 } 2828 }
2828 bit++; 2829 bit++;
2829 } 2830 }
@@ -2916,9 +2917,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2916 &migrate, sizeof(migrate), nodenum, 2917 &migrate, sizeof(migrate), nodenum,
2917 &status); 2918 &status);
2918 if (ret < 0) { 2919 if (ret < 0) {
2919 mlog(ML_ERROR, "Error %d when sending message %u (key " 2920 mlog(ML_ERROR, "%s: res %.*s, Error %d send "
2920 "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG, 2921 "MIGRATE_REQUEST to node %u\n", dlm->name,
2921 dlm->key, nodenum); 2922 migrate.namelen, migrate.name, ret, nodenum);
2922 if (!dlm_is_host_down(ret)) { 2923 if (!dlm_is_host_down(ret)) {
2923 mlog(ML_ERROR, "unhandled error=%d!\n", ret); 2924 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
2924 BUG(); 2925 BUG();
@@ -2937,7 +2938,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2937 dlm->name, res->lockname.len, res->lockname.name, 2938 dlm->name, res->lockname.len, res->lockname.name,
2938 nodenum); 2939 nodenum);
2939 spin_lock(&res->spinlock); 2940 spin_lock(&res->spinlock);
2940 dlm_lockres_set_refmap_bit(nodenum, res); 2941 dlm_lockres_set_refmap_bit(dlm, res, nodenum);
2941 spin_unlock(&res->spinlock); 2942 spin_unlock(&res->spinlock);
2942 } 2943 }
2943 } 2944 }
@@ -3271,7 +3272,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
3271 * mastery reference here since old_master will briefly have 3272 * mastery reference here since old_master will briefly have
3272 * a reference after the migration completes */ 3273 * a reference after the migration completes */
3273 spin_lock(&res->spinlock); 3274 spin_lock(&res->spinlock);
3274 dlm_lockres_set_refmap_bit(old_master, res); 3275 dlm_lockres_set_refmap_bit(dlm, res, old_master);
3275 spin_unlock(&res->spinlock); 3276 spin_unlock(&res->spinlock);
3276 3277
3277 mlog(0, "now time to do a migrate request to other nodes\n"); 3278 mlog(0, "now time to do a migrate request to other nodes\n");
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 7efab6d28a21..01ebfd0bdad7 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -362,40 +362,38 @@ static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node)
362} 362}
363 363
364 364
365int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) 365void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
366{ 366{
367 if (timeout) { 367 if (dlm_is_node_dead(dlm, node))
368 mlog(ML_NOTICE, "%s: waiting %dms for notification of " 368 return;
369 "death of node %u\n", dlm->name, timeout, node); 369
370 printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in "
371 "domain %s\n", node, dlm->name);
372
373 if (timeout)
370 wait_event_timeout(dlm->dlm_reco_thread_wq, 374 wait_event_timeout(dlm->dlm_reco_thread_wq,
371 dlm_is_node_dead(dlm, node), 375 dlm_is_node_dead(dlm, node),
372 msecs_to_jiffies(timeout)); 376 msecs_to_jiffies(timeout));
373 } else { 377 else
374 mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
375 "of death of node %u\n", dlm->name, node);
376 wait_event(dlm->dlm_reco_thread_wq, 378 wait_event(dlm->dlm_reco_thread_wq,
377 dlm_is_node_dead(dlm, node)); 379 dlm_is_node_dead(dlm, node));
378 }
379 /* for now, return 0 */
380 return 0;
381} 380}
382 381
383int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) 382void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout)
384{ 383{
385 if (timeout) { 384 if (dlm_is_node_recovered(dlm, node))
386 mlog(0, "%s: waiting %dms for notification of " 385 return;
387 "recovery of node %u\n", dlm->name, timeout, node); 386
387 printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in "
388 "domain %s\n", node, dlm->name);
389
390 if (timeout)
388 wait_event_timeout(dlm->dlm_reco_thread_wq, 391 wait_event_timeout(dlm->dlm_reco_thread_wq,
389 dlm_is_node_recovered(dlm, node), 392 dlm_is_node_recovered(dlm, node),
390 msecs_to_jiffies(timeout)); 393 msecs_to_jiffies(timeout));
391 } else { 394 else
392 mlog(0, "%s: waiting indefinitely for notification "
393 "of recovery of node %u\n", dlm->name, node);
394 wait_event(dlm->dlm_reco_thread_wq, 395 wait_event(dlm->dlm_reco_thread_wq,
395 dlm_is_node_recovered(dlm, node)); 396 dlm_is_node_recovered(dlm, node));
396 }
397 /* for now, return 0 */
398 return 0;
399} 397}
400 398
401/* callers of the top-level api calls (dlmlock/dlmunlock) should 399/* callers of the top-level api calls (dlmlock/dlmunlock) should
@@ -430,6 +428,8 @@ static void dlm_begin_recovery(struct dlm_ctxt *dlm)
430{ 428{
431 spin_lock(&dlm->spinlock); 429 spin_lock(&dlm->spinlock);
432 BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); 430 BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
431 printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
432 dlm->name, dlm->reco.dead_node);
433 dlm->reco.state |= DLM_RECO_STATE_ACTIVE; 433 dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
434 spin_unlock(&dlm->spinlock); 434 spin_unlock(&dlm->spinlock);
435} 435}
@@ -440,9 +440,18 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm)
440 BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); 440 BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE));
441 dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; 441 dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE;
442 spin_unlock(&dlm->spinlock); 442 spin_unlock(&dlm->spinlock);
443 printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name);
443 wake_up(&dlm->reco.event); 444 wake_up(&dlm->reco.event);
444} 445}
445 446
447static void dlm_print_recovery_master(struct dlm_ctxt *dlm)
448{
449 printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the "
450 "dead node %u in domain %s\n", dlm->reco.new_master,
451 (dlm->node_num == dlm->reco.new_master ? "me" : "he"),
452 dlm->reco.dead_node, dlm->name);
453}
454
446static int dlm_do_recovery(struct dlm_ctxt *dlm) 455static int dlm_do_recovery(struct dlm_ctxt *dlm)
447{ 456{
448 int status = 0; 457 int status = 0;
@@ -505,9 +514,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
505 } 514 }
506 mlog(0, "another node will master this recovery session.\n"); 515 mlog(0, "another node will master this recovery session.\n");
507 } 516 }
508 mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", 517
509 dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, 518 dlm_print_recovery_master(dlm);
510 dlm->node_num, dlm->reco.dead_node);
511 519
512 /* it is safe to start everything back up here 520 /* it is safe to start everything back up here
513 * because all of the dead node's lock resources 521 * because all of the dead node's lock resources
@@ -518,15 +526,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
518 return 0; 526 return 0;
519 527
520master_here: 528master_here:
521 mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " 529 dlm_print_recovery_master(dlm);
522 "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task),
523 dlm->node_num, dlm->reco.dead_node, dlm->name);
524 530
525 status = dlm_remaster_locks(dlm, dlm->reco.dead_node); 531 status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
526 if (status < 0) { 532 if (status < 0) {
527 /* we should never hit this anymore */ 533 /* we should never hit this anymore */
528 mlog(ML_ERROR, "error %d remastering locks for node %u, " 534 mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, "
529 "retrying.\n", status, dlm->reco.dead_node); 535 "retrying.\n", dlm->name, status, dlm->reco.dead_node);
530 /* yield a bit to allow any final network messages 536 /* yield a bit to allow any final network messages
531 * to get handled on remaining nodes */ 537 * to get handled on remaining nodes */
532 msleep(100); 538 msleep(100);
@@ -567,7 +573,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
567 BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); 573 BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
568 ndata->state = DLM_RECO_NODE_DATA_REQUESTING; 574 ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
569 575
570 mlog(0, "requesting lock info from node %u\n", 576 mlog(0, "%s: Requesting lock info from node %u\n", dlm->name,
571 ndata->node_num); 577 ndata->node_num);
572 578
573 if (ndata->node_num == dlm->node_num) { 579 if (ndata->node_num == dlm->node_num) {
@@ -640,7 +646,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
640 spin_unlock(&dlm_reco_state_lock); 646 spin_unlock(&dlm_reco_state_lock);
641 } 647 }
642 648
643 mlog(0, "done requesting all lock info\n"); 649 mlog(0, "%s: Done requesting all lock info\n", dlm->name);
644 650
645 /* nodes should be sending reco data now 651 /* nodes should be sending reco data now
646 * just need to wait */ 652 * just need to wait */
@@ -802,10 +808,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
802 808
803 /* negative status is handled by caller */ 809 /* negative status is handled by caller */
804 if (ret < 0) 810 if (ret < 0)
805 mlog(ML_ERROR, "Error %d when sending message %u (key " 811 mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u "
806 "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG, 812 "to recover dead node %u\n", dlm->name, ret,
807 dlm->key, request_from); 813 request_from, dead_node);
808
809 // return from here, then 814 // return from here, then
810 // sleep until all received or error 815 // sleep until all received or error
811 return ret; 816 return ret;
@@ -956,9 +961,9 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
956 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 961 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
957 sizeof(done_msg), send_to, &tmpret); 962 sizeof(done_msg), send_to, &tmpret);
958 if (ret < 0) { 963 if (ret < 0) {
959 mlog(ML_ERROR, "Error %d when sending message %u (key " 964 mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u "
960 "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG, 965 "to recover dead node %u\n", dlm->name, ret, send_to,
961 dlm->key, send_to); 966 dead_node);
962 if (!dlm_is_host_down(ret)) { 967 if (!dlm_is_host_down(ret)) {
963 BUG(); 968 BUG();
964 } 969 }
@@ -1127,9 +1132,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1127 if (ret < 0) { 1132 if (ret < 0) {
1128 /* XXX: negative status is not handled. 1133 /* XXX: negative status is not handled.
1129 * this will end up killing this node. */ 1134 * this will end up killing this node. */
1130 mlog(ML_ERROR, "Error %d when sending message %u (key " 1135 mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to "
1131 "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG, 1136 "node %u (%s)\n", dlm->name, mres->lockname_len,
1132 dlm->key, send_to); 1137 mres->lockname, ret, send_to,
1138 (orig_flags & DLM_MRES_MIGRATION ?
1139 "migration" : "recovery"));
1133 } else { 1140 } else {
1134 /* might get an -ENOMEM back here */ 1141 /* might get an -ENOMEM back here */
1135 ret = status; 1142 ret = status;
@@ -1767,7 +1774,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
1767 dlm->name, mres->lockname_len, mres->lockname, 1774 dlm->name, mres->lockname_len, mres->lockname,
1768 from); 1775 from);
1769 spin_lock(&res->spinlock); 1776 spin_lock(&res->spinlock);
1770 dlm_lockres_set_refmap_bit(from, res); 1777 dlm_lockres_set_refmap_bit(dlm, res, from);
1771 spin_unlock(&res->spinlock); 1778 spin_unlock(&res->spinlock);
1772 added++; 1779 added++;
1773 break; 1780 break;
@@ -1965,7 +1972,7 @@ skip_lvb:
1965 mlog(0, "%s:%.*s: added lock for node %u, " 1972 mlog(0, "%s:%.*s: added lock for node %u, "
1966 "setting refmap bit\n", dlm->name, 1973 "setting refmap bit\n", dlm->name,
1967 res->lockname.len, res->lockname.name, ml->node); 1974 res->lockname.len, res->lockname.name, ml->node);
1968 dlm_lockres_set_refmap_bit(ml->node, res); 1975 dlm_lockres_set_refmap_bit(dlm, res, ml->node);
1969 added++; 1976 added++;
1970 } 1977 }
1971 spin_unlock(&res->spinlock); 1978 spin_unlock(&res->spinlock);
@@ -2084,6 +2091,9 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2084 2091
2085 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { 2092 list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
2086 if (res->owner == dead_node) { 2093 if (res->owner == dead_node) {
2094 mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
2095 dlm->name, res->lockname.len, res->lockname.name,
2096 res->owner, new_master);
2087 list_del_init(&res->recovering); 2097 list_del_init(&res->recovering);
2088 spin_lock(&res->spinlock); 2098 spin_lock(&res->spinlock);
2089 /* new_master has our reference from 2099 /* new_master has our reference from
@@ -2105,40 +2115,30 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
2105 for (i = 0; i < DLM_HASH_BUCKETS; i++) { 2115 for (i = 0; i < DLM_HASH_BUCKETS; i++) {
2106 bucket = dlm_lockres_hash(dlm, i); 2116 bucket = dlm_lockres_hash(dlm, i);
2107 hlist_for_each_entry(res, hash_iter, bucket, hash_node) { 2117 hlist_for_each_entry(res, hash_iter, bucket, hash_node) {
2108 if (res->state & DLM_LOCK_RES_RECOVERING) { 2118 if (!(res->state & DLM_LOCK_RES_RECOVERING))
2109 if (res->owner == dead_node) { 2119 continue;
2110 mlog(0, "(this=%u) res %.*s owner=%u "
2111 "was not on recovering list, but "
2112 "clearing state anyway\n",
2113 dlm->node_num, res->lockname.len,
2114 res->lockname.name, new_master);
2115 } else if (res->owner == dlm->node_num) {
2116 mlog(0, "(this=%u) res %.*s owner=%u "
2117 "was not on recovering list, "
2118 "owner is THIS node, clearing\n",
2119 dlm->node_num, res->lockname.len,
2120 res->lockname.name, new_master);
2121 } else
2122 continue;
2123 2120
2124 if (!list_empty(&res->recovering)) { 2121 if (res->owner != dead_node &&
2125 mlog(0, "%s:%.*s: lockres was " 2122 res->owner != dlm->node_num)
2126 "marked RECOVERING, owner=%u\n", 2123 continue;
2127 dlm->name, res->lockname.len, 2124
2128 res->lockname.name, res->owner); 2125 if (!list_empty(&res->recovering)) {
2129 list_del_init(&res->recovering); 2126 list_del_init(&res->recovering);
2130 dlm_lockres_put(res); 2127 dlm_lockres_put(res);
2131 }
2132 spin_lock(&res->spinlock);
2133 /* new_master has our reference from
2134 * the lock state sent during recovery */
2135 dlm_change_lockres_owner(dlm, res, new_master);
2136 res->state &= ~DLM_LOCK_RES_RECOVERING;
2137 if (__dlm_lockres_has_locks(res))
2138 __dlm_dirty_lockres(dlm, res);
2139 spin_unlock(&res->spinlock);
2140 wake_up(&res->wq);
2141 } 2128 }
2129
2130 /* new_master has our reference from
2131 * the lock state sent during recovery */
2132 mlog(0, "%s: res %.*s, Changing owner from %u to %u\n",
2133 dlm->name, res->lockname.len, res->lockname.name,
2134 res->owner, new_master);
2135 spin_lock(&res->spinlock);
2136 dlm_change_lockres_owner(dlm, res, new_master);
2137 res->state &= ~DLM_LOCK_RES_RECOVERING;
2138 if (__dlm_lockres_has_locks(res))
2139 __dlm_dirty_lockres(dlm, res);
2140 spin_unlock(&res->spinlock);
2141 wake_up(&res->wq);
2142 } 2142 }
2143 } 2143 }
2144} 2144}
@@ -2252,12 +2252,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
2252 res->lockname.len, res->lockname.name, freed, dead_node); 2252 res->lockname.len, res->lockname.name, freed, dead_node);
2253 __dlm_print_one_lock_resource(res); 2253 __dlm_print_one_lock_resource(res);
2254 } 2254 }
2255 dlm_lockres_clear_refmap_bit(dead_node, res); 2255 dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
2256 } else if (test_bit(dead_node, res->refmap)) { 2256 } else if (test_bit(dead_node, res->refmap)) {
2257 mlog(0, "%s:%.*s: dead node %u had a ref, but had " 2257 mlog(0, "%s:%.*s: dead node %u had a ref, but had "
2258 "no locks and had not purged before dying\n", dlm->name, 2258 "no locks and had not purged before dying\n", dlm->name,
2259 res->lockname.len, res->lockname.name, dead_node); 2259 res->lockname.len, res->lockname.name, dead_node);
2260 dlm_lockres_clear_refmap_bit(dead_node, res); 2260 dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
2261 } 2261 }
2262 2262
2263 /* do not kick thread yet */ 2263 /* do not kick thread yet */
@@ -2324,9 +2324,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2324 dlm_revalidate_lvb(dlm, res, dead_node); 2324 dlm_revalidate_lvb(dlm, res, dead_node);
2325 if (res->owner == dead_node) { 2325 if (res->owner == dead_node) {
2326 if (res->state & DLM_LOCK_RES_DROPPING_REF) { 2326 if (res->state & DLM_LOCK_RES_DROPPING_REF) {
2327 mlog(ML_NOTICE, "Ignore %.*s for " 2327 mlog(ML_NOTICE, "%s: res %.*s, Skip "
2328 "recovery as it is being freed\n", 2328 "recovery as it is being freed\n",
2329 res->lockname.len, 2329 dlm->name, res->lockname.len,
2330 res->lockname.name); 2330 res->lockname.name);
2331 } else 2331 } else
2332 dlm_move_lockres_to_recovery_list(dlm, 2332 dlm_move_lockres_to_recovery_list(dlm,
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 1d6d1d22c471..e73c833fc2a1 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -94,24 +94,26 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
94{ 94{
95 int bit; 95 int bit;
96 96
97 assert_spin_locked(&res->spinlock);
98
97 if (__dlm_lockres_has_locks(res)) 99 if (__dlm_lockres_has_locks(res))
98 return 0; 100 return 0;
99 101
102 /* Locks are in the process of being created */
103 if (res->inflight_locks)
104 return 0;
105
100 if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) 106 if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY)
101 return 0; 107 return 0;
102 108
103 if (res->state & DLM_LOCK_RES_RECOVERING) 109 if (res->state & DLM_LOCK_RES_RECOVERING)
104 return 0; 110 return 0;
105 111
112 /* Another node has this resource with this node as the master */
106 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); 113 bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
107 if (bit < O2NM_MAX_NODES) 114 if (bit < O2NM_MAX_NODES)
108 return 0; 115 return 0;
109 116
110 /*
111 * since the bit for dlm->node_num is not set, inflight_locks better
112 * be zero
113 */
114 BUG_ON(res->inflight_locks != 0);
115 return 1; 117 return 1;
116} 118}
117 119
@@ -185,8 +187,6 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
185 /* clear our bit from the master's refmap, ignore errors */ 187 /* clear our bit from the master's refmap, ignore errors */
186 ret = dlm_drop_lockres_ref(dlm, res); 188 ret = dlm_drop_lockres_ref(dlm, res);
187 if (ret < 0) { 189 if (ret < 0) {
188 mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name,
189 res->lockname.len, res->lockname.name, ret);
190 if (!dlm_is_host_down(ret)) 190 if (!dlm_is_host_down(ret))
191 BUG(); 191 BUG();
192 } 192 }
@@ -209,7 +209,7 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
209 BUG(); 209 BUG();
210 } 210 }
211 211
212 __dlm_unhash_lockres(res); 212 __dlm_unhash_lockres(dlm, res);
213 213
214 /* lockres is not in the hash now. drop the flag and wake up 214 /* lockres is not in the hash now. drop the flag and wake up
215 * any processes waiting in dlm_get_lock_resource. */ 215 * any processes waiting in dlm_get_lock_resource. */