aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorJoseph Qi <joseph.qi@huawei.com>2014-10-09 18:25:13 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:47 -0400
commit70e82a12dbfa3acbff41be08a36e8be4578878c9 (patch)
tree2f5462c4d1ded93f1710d8c0d83ae67473402a87 /fs/ocfs2
parent5046f18d5bd9ad7638b32c3b304ff39a74c064df (diff)
ocfs2: fix deadlock between o2hb thread and o2net_wq
The following case may lead to o2net_wq and o2hb thread deadlock on o2hb_callback_sem. Currently there are 2 nodes say N1, N2 in the cluster. And N2 down, at the same time, N3 tries to join the cluster. So N1 will handle node down (N2) and join (N3) simultaneously. o2hb o2net_wq ->o2hb_do_disk_heartbeat ->o2hb_check_slot ->o2hb_run_event_list ->o2hb_fire_callbacks ->down_write(&o2hb_callback_sem) ->o2net_hb_node_down_cb ->flush_workqueue(o2net_wq) ->o2net_process_message ->dlm_query_join_handler ->o2hb_check_node_heartbeating ->o2hb_fill_node_map ->down_read(&o2hb_callback_sem) No need to take o2hb_callback_sem in dlm_query_join_handler, o2hb_live_lock is enough to protect live node map. Signed-off-by: Joseph Qi <joseph.qi@huawei.com> Cc: xMark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: jiangyiwen <jiangyiwen@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c19
-rw-r--r--fs/ocfs2/cluster/heartbeat.h1
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
3 files changed, 21 insertions, 1 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 73039295d0d1..d13385448168 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2572,6 +2572,25 @@ int o2hb_check_node_heartbeating(u8 node_num)
2572} 2572}
2573EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating); 2573EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating);
2574 2574
2575int o2hb_check_node_heartbeating_no_sem(u8 node_num)
2576{
2577 unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
2578 unsigned long flags;
2579
2580 spin_lock_irqsave(&o2hb_live_lock, flags);
2581 o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
2582 spin_unlock_irqrestore(&o2hb_live_lock, flags);
2583 if (!test_bit(node_num, testing_map)) {
2584 mlog(ML_HEARTBEAT,
2585 "node (%u) does not have heartbeating enabled.\n",
2586 node_num);
2587 return 0;
2588 }
2589
2590 return 1;
2591}
2592EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating_no_sem);
2593
2575int o2hb_check_node_heartbeating_from_callback(u8 node_num) 2594int o2hb_check_node_heartbeating_from_callback(u8 node_num)
2576{ 2595{
2577 unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; 2596 unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index 00ad8e8fea51..3ef5137dc362 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -80,6 +80,7 @@ void o2hb_fill_node_map(unsigned long *map,
80void o2hb_exit(void); 80void o2hb_exit(void);
81int o2hb_init(void); 81int o2hb_init(void);
82int o2hb_check_node_heartbeating(u8 node_num); 82int o2hb_check_node_heartbeating(u8 node_num);
83int o2hb_check_node_heartbeating_no_sem(u8 node_num);
83int o2hb_check_node_heartbeating_from_callback(u8 node_num); 84int o2hb_check_node_heartbeating_from_callback(u8 node_num);
84int o2hb_check_local_node_heartbeating(void); 85int o2hb_check_local_node_heartbeating(void);
85void o2hb_stop_all_regions(void); 86void o2hb_stop_all_regions(void);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 257a6dfe3f13..02d315fef432 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -839,7 +839,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
839 * to back off and try again. This gives heartbeat a chance 839 * to back off and try again. This gives heartbeat a chance
840 * to catch up. 840 * to catch up.
841 */ 841 */
842 if (!o2hb_check_node_heartbeating(query->node_idx)) { 842 if (!o2hb_check_node_heartbeating_no_sem(query->node_idx)) {
843 mlog(0, "node %u is not in our live map yet\n", 843 mlog(0, "node %u is not in our live map yet\n",
844 query->node_idx); 844 query->node_idx);
845 845