aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
authortsutomu.owa@toshiba.co.jp <tsutomu.owa@toshiba.co.jp>2017-09-12 04:55:40 -0400
committerDavid Teigland <teigland@redhat.com>2017-09-25 13:45:21 -0400
commitf0fb83cb9201a9f272f8ac771eed6b1e5745375c (patch)
treefc5803156275c3acf21f1c1121bacfedb2ccb2dd /fs/dlm
parent988419a9deab68035364d8163bc27adb694ab28e (diff)
DLM: fix double list_del()
dlm_lowcomms_stop() was not functioning properly. Correctly, we have to wait until all processing is finished with send_workqueue and recv_workqueue. This problem causes the following issue. Senario is 1. dlm_send thread: send_to_sock refers con->writequeue 2. main thread: dlm_lowcomms_stop calls list_del 3. dlm_send thread: send_to_sock calls list_del in writequeue_entry_complete [ 1925.770305] dlm: canceled swork for node 4 [ 1925.772374] general protection fault: 0000 [#1] SMP [ 1925.777930] Modules linked in: ocfs2_stack_user ocfs2 ocfs2_nodemanager ocfs2_stackglue dlm fmxnet(O) fmx_api(O) fmx_cu(O) igb(O) kvm_intel kvm irqbypass autofs4 [ 1925.794131] CPU: 3 PID: 6994 Comm: kworker/u8:0 Tainted: G O 4.4.39 #1 [ 1925.802684] Hardware name: TOSHIBA OX/OX, BIOS OX-P0015 12/03/2015 [ 1925.809595] Workqueue: dlm_send process_send_sockets [dlm] [ 1925.815714] task: ffff8804398d3c00 ti: ffff88046910c000 task.ti: ffff88046910c000 [ 1925.824072] RIP: 0010:[<ffffffffa04bd158>] [<ffffffffa04bd158>] process_send_sockets+0xf8/0x280 [dlm] [ 1925.834480] RSP: 0018:ffff88046910fde0 EFLAGS: 00010246 [ 1925.840411] RAX: dead000000000200 RBX: 0000000000000001 RCX: 000000000000000a [ 1925.848372] RDX: ffff88046bd980c0 RSI: 0000000000000000 RDI: ffff8804673c5670 [ 1925.856341] RBP: ffff88046910fe20 R08: 00000000000000c9 R09: 0000000000000010 [ 1925.864311] R10: ffffffff81e22fc0 R11: 0000000000000000 R12: ffff8804673c56d8 [ 1925.872281] R13: ffff8804673c5660 R14: ffff88046bd98440 R15: 0000000000000058 [ 1925.880251] FS: 0000000000000000(0000) GS:ffff88047fd80000(0000) knlGS:0000000000000000 [ 1925.889280] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1925.895694] CR2: 00007fff09eadf58 CR3: 00000004690f5000 CR4: 00000000001006e0 [ 1925.903663] Stack: [ 1925.905903] ffff8804673c5630 ffff8804673c5620 ffff8804673c5670 ffff88007d219b40 [ 1925.914181] ffff88046f095800 0000000000000100 ffff8800717a1400 ffff8804673c56d8 [ 1925.922459] ffff88046910fe60 ffffffff81073db2 00ff880400000000 ffff88007d219b40 [ 1925.930736] Call Trace: [ 1925.933468] [<ffffffff81073db2>] process_one_work+0x162/0x450 [ 1925.939983] [<ffffffff81074459>] worker_thread+0x69/0x4a0 [ 1925.946109] [<ffffffff810743f0>] ? rescuer_thread+0x350/0x350 [ 1925.952622] [<ffffffff8107956f>] kthread+0xef/0x110 [ 1925.958165] [<ffffffff81079480>] ? kthread_park+0x60/0x60 [ 1925.964283] [<ffffffff8186ab2f>] ret_from_fork+0x3f/0x70 [ 1925.970312] [<ffffffff81079480>] ? kthread_park+0x60/0x60 [ 1925.976436] Code: 01 00 00 48 8b 7d d0 e8 07 d3 3a e1 45 01 7e 18 45 29 7e 1c 75 ab 41 8b 46 24 85 c0 75 a3 49 8b 16 49 8b 46 08 31 f6 48 89 42 08 <48> 89 10 48 b8 00 01 00 00 00 00 ad de 49 8b 7e 10 49 89 06 66 [ 1925.997791] RIP [<ffffffffa04bd158>] process_send_sockets+0xf8/0x280 [dlm] [ 1926.005577] RSP <ffff88046910fde0> Signed-off-by: Tadashi Miyauchi <miyauchi@toshiba-tops.co.jp> Signed-off-by: Tsutomu Owa <tsutomu.owa@toshiba.co.jp> Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/lowcomms.c44
1 files changed, 39 insertions, 5 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 72247cb4bc5e..980c58befd53 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1628,11 +1628,20 @@ static int work_start(void)
1628 return 0; 1628 return 0;
1629} 1629}
1630 1630
1631static void stop_conn(struct connection *con) 1631static void _stop_conn(struct connection *con, bool and_other)
1632{ 1632{
1633 con->flags |= 0x0F; 1633 mutex_lock(&con->sock_mutex);
1634 set_bit(CF_READ_PENDING, &con->flags);
1634 if (con->sock && con->sock->sk) 1635 if (con->sock && con->sock->sk)
1635 con->sock->sk->sk_user_data = NULL; 1636 con->sock->sk->sk_user_data = NULL;
1637 if (con->othercon && and_other)
1638 _stop_conn(con->othercon, false);
1639 mutex_unlock(&con->sock_mutex);
1640}
1641
1642static void stop_conn(struct connection *con)
1643{
1644 _stop_conn(con, true);
1636} 1645}
1637 1646
1638static void free_conn(struct connection *con) 1647static void free_conn(struct connection *con)
@@ -1644,6 +1653,32 @@ static void free_conn(struct connection *con)
1644 kmem_cache_free(con_cache, con); 1653 kmem_cache_free(con_cache, con);
1645} 1654}
1646 1655
1656static void work_flush(void)
1657{
1658 int ok;
1659 int i;
1660 struct hlist_node *n;
1661 struct connection *con;
1662
1663 flush_workqueue(recv_workqueue);
1664 flush_workqueue(send_workqueue);
1665 do {
1666 ok = 1;
1667 foreach_conn(stop_conn);
1668 flush_workqueue(recv_workqueue);
1669 flush_workqueue(send_workqueue);
1670 for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
1671 hlist_for_each_entry_safe(con, n,
1672 &connection_hash[i], list) {
1673 ok &= test_bit(CF_READ_PENDING, &con->flags);
1674 if (con->othercon)
1675 ok &= test_bit(CF_READ_PENDING,
1676 &con->othercon->flags);
1677 }
1678 }
1679 } while (!ok);
1680}
1681
1647void dlm_lowcomms_stop(void) 1682void dlm_lowcomms_stop(void)
1648{ 1683{
1649 /* Set all the flags to prevent any 1684 /* Set all the flags to prevent any
@@ -1651,11 +1686,10 @@ void dlm_lowcomms_stop(void)
1651 */ 1686 */
1652 mutex_lock(&connections_lock); 1687 mutex_lock(&connections_lock);
1653 dlm_allow_conn = 0; 1688 dlm_allow_conn = 0;
1654 foreach_conn(stop_conn); 1689 mutex_unlock(&connections_lock);
1690 work_flush();
1655 clean_writequeues(); 1691 clean_writequeues();
1656 foreach_conn(free_conn); 1692 foreach_conn(free_conn);
1657 mutex_unlock(&connections_lock);
1658
1659 work_stop(); 1693 work_stop();
1660 1694
1661 kmem_cache_destroy(con_cache); 1695 kmem_cache_destroy(con_cache);