diff options
author | Joseph Qi <joseph.qi@huawei.com> | 2015-11-05 21:44:07 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 22:34:48 -0500 |
commit | 0986fe9b50f425ec81f25a1a85aaf3574b31d801 (patch) | |
tree | f8308dd1893755a7dcb852126efcb3625ea5e113 /fs | |
parent | 93d911fcce259a3f950ee20592beee31b855cd96 (diff) |
ocfs2: fix race between mount and delete node/cluster
There is a race case between mount and delete node/cluster, which will
lead o2hb_thread to malfunctioning dead loop.
o2hb_thread
{
o2nm_depend_this_node();
<<<<<< race window, node may have already been deleted, and then
enter the loop, o2hb thread will be malfunctioning
because of no configured nodes found.
while (!kthread_should_stop() &&
!reg->hr_unclean_stop && !reg->hr_aborted_start) {
}
So check the return value of o2nm_depend_this_node() is needed. If node
has been deleted, do not enter the loop and let mount fail.
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index fa15debcc02b..ddddef0021a0 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -219,7 +219,8 @@ struct o2hb_region { | |||
219 | unsigned hr_unclean_stop:1, | 219 | unsigned hr_unclean_stop:1, |
220 | hr_aborted_start:1, | 220 | hr_aborted_start:1, |
221 | hr_item_pinned:1, | 221 | hr_item_pinned:1, |
222 | hr_item_dropped:1; | 222 | hr_item_dropped:1, |
223 | hr_node_deleted:1; | ||
223 | 224 | ||
224 | /* protected by the hr_callback_sem */ | 225 | /* protected by the hr_callback_sem */ |
225 | struct task_struct *hr_task; | 226 | struct task_struct *hr_task; |
@@ -1078,7 +1079,13 @@ static int o2hb_thread(void *data) | |||
1078 | set_user_nice(current, MIN_NICE); | 1079 | set_user_nice(current, MIN_NICE); |
1079 | 1080 | ||
1080 | /* Pin node */ | 1081 | /* Pin node */ |
1081 | o2nm_depend_this_node(); | 1082 | ret = o2nm_depend_this_node(); |
1083 | if (ret) { | ||
1084 | mlog(ML_ERROR, "Node has been deleted, ret = %d\n", ret); | ||
1085 | reg->hr_node_deleted = 1; | ||
1086 | wake_up(&o2hb_steady_queue); | ||
1087 | return 0; | ||
1088 | } | ||
1082 | 1089 | ||
1083 | while (!kthread_should_stop() && | 1090 | while (!kthread_should_stop() && |
1084 | !reg->hr_unclean_stop && !reg->hr_aborted_start) { | 1091 | !reg->hr_unclean_stop && !reg->hr_aborted_start) { |
@@ -1787,7 +1794,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1787 | spin_unlock(&o2hb_live_lock); | 1794 | spin_unlock(&o2hb_live_lock); |
1788 | 1795 | ||
1789 | ret = wait_event_interruptible(o2hb_steady_queue, | 1796 | ret = wait_event_interruptible(o2hb_steady_queue, |
1790 | atomic_read(®->hr_steady_iterations) == 0); | 1797 | atomic_read(®->hr_steady_iterations) == 0 || |
1798 | reg->hr_node_deleted); | ||
1791 | if (ret) { | 1799 | if (ret) { |
1792 | atomic_set(®->hr_steady_iterations, 0); | 1800 | atomic_set(®->hr_steady_iterations, 0); |
1793 | reg->hr_aborted_start = 1; | 1801 | reg->hr_aborted_start = 1; |
@@ -1798,6 +1806,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1798 | goto out3; | 1806 | goto out3; |
1799 | } | 1807 | } |
1800 | 1808 | ||
1809 | if (reg->hr_node_deleted) { | ||
1810 | ret = -EINVAL; | ||
1811 | goto out3; | ||
1812 | } | ||
1813 | |||
1801 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1814 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1802 | spin_lock(&o2hb_live_lock); | 1815 | spin_lock(&o2hb_live_lock); |
1803 | hb_task = reg->hr_task; | 1816 | hb_task = reg->hr_task; |