aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJoseph Qi <joseph.qi@huawei.com>2015-11-05 21:44:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 22:34:48 -0500
commit0986fe9b50f425ec81f25a1a85aaf3574b31d801 (patch)
treef8308dd1893755a7dcb852126efcb3625ea5e113 /fs
parent93d911fcce259a3f950ee20592beee31b855cd96 (diff)
ocfs2: fix race between mount and delete node/cluster
There is a race case between mount and delete node/cluster, which will lead o2hb_thread to malfunctioning dead loop. o2hb_thread { o2nm_depend_this_node(); <<<<<< race window, node may have already been deleted, and then enter the loop, o2hb thread will be malfunctioning because of no configured nodes found. while (!kthread_should_stop() && !reg->hr_unclean_stop && !reg->hr_aborted_start) { } So check the return value of o2nm_depend_this_node() is needed. If node has been deleted, do not enter the loop and let mount fail. Signed-off-by: Joseph Qi <joseph.qi@huawei.com> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/cluster/heartbeat.c19
1 files changed, 16 insertions, 3 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index fa15debcc02b..ddddef0021a0 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -219,7 +219,8 @@ struct o2hb_region {
219 unsigned hr_unclean_stop:1, 219 unsigned hr_unclean_stop:1,
220 hr_aborted_start:1, 220 hr_aborted_start:1,
221 hr_item_pinned:1, 221 hr_item_pinned:1,
222 hr_item_dropped:1; 222 hr_item_dropped:1,
223 hr_node_deleted:1;
223 224
224 /* protected by the hr_callback_sem */ 225 /* protected by the hr_callback_sem */
225 struct task_struct *hr_task; 226 struct task_struct *hr_task;
@@ -1078,7 +1079,13 @@ static int o2hb_thread(void *data)
1078 set_user_nice(current, MIN_NICE); 1079 set_user_nice(current, MIN_NICE);
1079 1080
1080 /* Pin node */ 1081 /* Pin node */
1081 o2nm_depend_this_node(); 1082 ret = o2nm_depend_this_node();
1083 if (ret) {
1084 mlog(ML_ERROR, "Node has been deleted, ret = %d\n", ret);
1085 reg->hr_node_deleted = 1;
1086 wake_up(&o2hb_steady_queue);
1087 return 0;
1088 }
1082 1089
1083 while (!kthread_should_stop() && 1090 while (!kthread_should_stop() &&
1084 !reg->hr_unclean_stop && !reg->hr_aborted_start) { 1091 !reg->hr_unclean_stop && !reg->hr_aborted_start) {
@@ -1787,7 +1794,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1787 spin_unlock(&o2hb_live_lock); 1794 spin_unlock(&o2hb_live_lock);
1788 1795
1789 ret = wait_event_interruptible(o2hb_steady_queue, 1796 ret = wait_event_interruptible(o2hb_steady_queue,
1790 atomic_read(&reg->hr_steady_iterations) == 0); 1797 atomic_read(&reg->hr_steady_iterations) == 0 ||
1798 reg->hr_node_deleted);
1791 if (ret) { 1799 if (ret) {
1792 atomic_set(&reg->hr_steady_iterations, 0); 1800 atomic_set(&reg->hr_steady_iterations, 0);
1793 reg->hr_aborted_start = 1; 1801 reg->hr_aborted_start = 1;
@@ -1798,6 +1806,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1798 goto out3; 1806 goto out3;
1799 } 1807 }
1800 1808
1809 if (reg->hr_node_deleted) {
1810 ret = -EINVAL;
1811 goto out3;
1812 }
1813
1801 /* Ok, we were woken. Make sure it wasn't by drop_item() */ 1814 /* Ok, we were woken. Make sure it wasn't by drop_item() */
1802 spin_lock(&o2hb_live_lock); 1815 spin_lock(&o2hb_live_lock);
1803 hb_task = reg->hr_task; 1816 hb_task = reg->hr_task;