diff options
author | Florian Westphal <fw@strlen.de> | 2016-01-20 05:16:43 -0500 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2016-01-31 18:15:26 -0500 |
commit | d93c6258ee4255749c10012c50a31c08f4e9fb16 (patch) | |
tree | bb2ca281b4a2467572b3541780a0b74088e75de3 /net/netfilter | |
parent | 53729eb174c1589f9185340ffe8c10b3f39f3ef3 (diff) |
netfilter: conntrack: resched in nf_ct_iterate_cleanup
Ulrich reports soft lockup with following (shortened) callchain:
NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s!
__netif_receive_skb_core+0x6e4/0x774
process_backlog+0x94/0x160
net_rx_action+0x88/0x178
call_do_softirq+0x24/0x3c
do_softirq+0x54/0x6c
__local_bh_enable_ip+0x7c/0xbc
nf_ct_iterate_cleanup+0x11c/0x22c [nf_conntrack]
masq_inet_event+0x20/0x30 [nf_nat_masquerade_ipv6]
atomic_notifier_call_chain+0x1c/0x2c
ipv6_del_addr+0x1bc/0x220 [ipv6]
Problem is that nf_ct_iterate_cleanup can run for a very long time
since it can be interrupted by softirq processing.
Moreover, atomic_notifier_call_chain runs with rcu readlock held.
So lets call cond_resched() in nf_ct_iterate_cleanup and defer
the call to a work queue for the atomic_notifier_call_chain case.
We also need another cond_resched in get_next_corpse, since we
have to deal with iter() always returning false, in that case
get_next_corpse will walk entire conntrack table.
Reported-by: Ulrich Weber <uw@ocedo.com>
Tested-by: Ulrich Weber <uw@ocedo.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 58882de06bd7..f60b4fdeeb8c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -1412,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), | |||
1412 | } | 1412 | } |
1413 | spin_unlock(lockp); | 1413 | spin_unlock(lockp); |
1414 | local_bh_enable(); | 1414 | local_bh_enable(); |
1415 | cond_resched(); | ||
1415 | } | 1416 | } |
1416 | 1417 | ||
1417 | for_each_possible_cpu(cpu) { | 1418 | for_each_possible_cpu(cpu) { |
@@ -1424,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), | |||
1424 | set_bit(IPS_DYING_BIT, &ct->status); | 1425 | set_bit(IPS_DYING_BIT, &ct->status); |
1425 | } | 1426 | } |
1426 | spin_unlock_bh(&pcpu->lock); | 1427 | spin_unlock_bh(&pcpu->lock); |
1428 | cond_resched(); | ||
1427 | } | 1429 | } |
1428 | return NULL; | 1430 | return NULL; |
1429 | found: | 1431 | found: |
@@ -1440,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net, | |||
1440 | struct nf_conn *ct; | 1442 | struct nf_conn *ct; |
1441 | unsigned int bucket = 0; | 1443 | unsigned int bucket = 0; |
1442 | 1444 | ||
1445 | might_sleep(); | ||
1446 | |||
1443 | while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { | 1447 | while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { |
1444 | /* Time to push up daises... */ | 1448 | /* Time to push up daises... */ |
1445 | if (del_timer(&ct->timeout)) | 1449 | if (del_timer(&ct->timeout)) |
@@ -1448,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net, | |||
1448 | /* ... else the timer will get him soon. */ | 1452 | /* ... else the timer will get him soon. */ |
1449 | 1453 | ||
1450 | nf_ct_put(ct); | 1454 | nf_ct_put(ct); |
1455 | cond_resched(); | ||
1451 | } | 1456 | } |
1452 | } | 1457 | } |
1453 | EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); | 1458 | EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); |