diff options
author | Xue jiufei <xuejiufei@huawei.com> | 2014-10-09 18:28:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-09 22:25:58 -0400 |
commit | b246d3d11e9c04f76a4fd6aae9c61da82bba0afb (patch) | |
tree | 74f3372f48038954afe5939f03ef096944cf6d05 | |
parent | 934f3072c17cc8886f4c043b47eeeb1b12f8de33 (diff) |
ocfs2: fix a deadlock while o2net_wq doing direct memory reclaim
Fix a deadlock problem caused by direct memory reclaim in o2net_wq. The
situation is as follows:
1) Receive a connect message from another node, node queues a
work_struct o2net_listen_work.
2) o2net_wq processes this work and call the following functions:
o2net_wq
-> o2net_accept_one
-> sock_create_lite
-> sock_alloc()
-> kmem_cache_alloc with GFP_KERNEL
-> ____cache_alloc_node
->__alloc_pages_nodemask
-> do_try_to_free_pages
-> shrink_slab
-> evict
-> ocfs2_evict_inode
-> ocfs2_drop_lock
-> dlmunlock
-> o2net_send_message_vec
then o2net_wq wait for the unlock reply from master.
3) tcp layer received the reply, call o2net_data_ready() and queue
sc_rx_work, waiting o2net_wq to process this work.
4) o2net_wq is a single thread workqueue, it process the work one by
one. Right now it is still doing o2net_listen_work and cannot handle
sc_rx_work. so we deadlock.
Junxiao Bi's patch "mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set"
(http://ozlabs.org/~akpm/mmots/broken-out/mm-clear-__gfp_fs-when-pf_memalloc_noio-is-set.patch)
clears __GFP_FS in memalloc_noio_flags() besides __GFP_IO. We use
memalloc_noio_save() to set process flag PF_MEMALLOC_NOIO so that all
allocations done by this process are done as if GFP_NOIO was specified.
We are not reentering filesystem while doing memory reclaim.
Signed-off-by: joyce.xue <xuejiufei@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 509e6d5415e2..97de0fbd9f78 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -1601,7 +1601,15 @@ static void o2net_start_connect(struct work_struct *work) | |||
1601 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1601 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
1602 | int ret = 0, stop; | 1602 | int ret = 0, stop; |
1603 | unsigned int timeout; | 1603 | unsigned int timeout; |
1604 | unsigned int noio_flag; | ||
1604 | 1605 | ||
1606 | /* | ||
1607 | * sock_create allocates the sock with GFP_KERNEL. We must set | ||
1608 | * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||
1609 | * by this process are done as if GFP_NOIO was specified. So we | ||
1610 | * are not reentering filesystem while doing memory reclaim. | ||
1611 | */ | ||
1612 | noio_flag = memalloc_noio_save(); | ||
1605 | /* if we're greater we initiate tx, otherwise we accept */ | 1613 | /* if we're greater we initiate tx, otherwise we accept */ |
1606 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1614 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
1607 | goto out; | 1615 | goto out; |
@@ -1710,6 +1718,7 @@ out: | |||
1710 | if (mynode) | 1718 | if (mynode) |
1711 | o2nm_node_put(mynode); | 1719 | o2nm_node_put(mynode); |
1712 | 1720 | ||
1721 | memalloc_noio_restore(noio_flag); | ||
1713 | return; | 1722 | return; |
1714 | } | 1723 | } |
1715 | 1724 | ||
@@ -1836,6 +1845,15 @@ static int o2net_accept_one(struct socket *sock, int *more) | |||
1836 | struct o2nm_node *local_node = NULL; | 1845 | struct o2nm_node *local_node = NULL; |
1837 | struct o2net_sock_container *sc = NULL; | 1846 | struct o2net_sock_container *sc = NULL; |
1838 | struct o2net_node *nn; | 1847 | struct o2net_node *nn; |
1848 | unsigned int noio_flag; | ||
1849 | |||
1850 | /* | ||
1851 | * sock_create_lite allocates the sock with GFP_KERNEL. We must set | ||
1852 | * per-process flag PF_MEMALLOC_NOIO so that all allocations done | ||
1853 | * by this process are done as if GFP_NOIO was specified. So we | ||
1854 | * are not reentering filesystem while doing memory reclaim. | ||
1855 | */ | ||
1856 | noio_flag = memalloc_noio_save(); | ||
1839 | 1857 | ||
1840 | BUG_ON(sock == NULL); | 1858 | BUG_ON(sock == NULL); |
1841 | *more = 0; | 1859 | *more = 0; |
@@ -1952,6 +1970,8 @@ out: | |||
1952 | o2nm_node_put(local_node); | 1970 | o2nm_node_put(local_node); |
1953 | if (sc) | 1971 | if (sc) |
1954 | sc_put(sc); | 1972 | sc_put(sc); |
1973 | |||
1974 | memalloc_noio_restore(noio_flag); | ||
1955 | return ret; | 1975 | return ret; |
1956 | } | 1976 | } |
1957 | 1977 | ||