aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2014-02-03 14:01:53 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2014-02-05 11:46:06 -0500
commite53376bef2cd97d3e3f61fdc677fb8da7d03d0da (patch)
treebf0938cf7dc39d627a140edf0969ab694b4a5a3a /net
parent829d9315c46a2be57a8fb40c89aeb7db61513d96 (diff)
netfilter: nf_conntrack: don't release a conntrack with non-zero refcnt
With this patch, the conntrack refcount is initially set to zero and it is bumped once it is added to any of the list, so we fulfill Eric's golden rule which is that all released objects always have a refcount that equals zero. Andrey Vagin reports that nf_conntrack_free can't be called for a conntrack with non-zero ref-counter, because it can race with nf_conntrack_find_get(). A conntrack slab is created with SLAB_DESTROY_BY_RCU. Non-zero ref-counter says that this conntrack is used. So when we release a conntrack with non-zero counter, we break this assumption. CPU1 CPU2 ____nf_conntrack_find() nf_ct_put() destroy_conntrack() ... init_conntrack __nf_conntrack_alloc (set use = 1) atomic_inc_not_zero(&ct->use) (use = 2) if (!l4proto->new(ct, skb, dataoff, timeouts)) nf_conntrack_free(ct); (use = 2 !!!) ... __nf_conntrack_alloc (set use = 1) if (!nf_ct_key_equal(h, tuple, zone)) nf_ct_put(ct); (use = 0) destroy_conntrack() /* continue to work with CT */ After applying the path "[PATCH] netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get" another bug was triggered in destroy_conntrack(): <4>[67096.759334] ------------[ cut here ]------------ <2>[67096.759353] kernel BUG at net/netfilter/nf_conntrack_core.c:211! ... <4>[67096.759837] Pid: 498649, comm: atdd veid: 666 Tainted: G C --------------- 2.6.32-042stab084.18 #1 042stab084_18 /DQ45CB <4>[67096.759932] RIP: 0010:[<ffffffffa03d99ac>] [<ffffffffa03d99ac>] destroy_conntrack+0x15c/0x190 [nf_conntrack] <4>[67096.760255] Call Trace: <4>[67096.760255] [<ffffffff814844a7>] nf_conntrack_destroy+0x17/0x30 <4>[67096.760255] [<ffffffffa03d9bb5>] nf_conntrack_find_get+0x85/0x130 [nf_conntrack] <4>[67096.760255] [<ffffffffa03d9fb2>] nf_conntrack_in+0x352/0xb60 [nf_conntrack] <4>[67096.760255] [<ffffffffa048c771>] ipv4_conntrack_local+0x51/0x60 [nf_conntrack_ipv4] <4>[67096.760255] [<ffffffff81484419>] nf_iterate+0x69/0xb0 <4>[67096.760255] [<ffffffff814b5b00>] ? dst_output+0x0/0x20 <4>[67096.760255] [<ffffffff814845d4>] nf_hook_slow+0x74/0x110 <4>[67096.760255] [<ffffffff814b5b00>] ? dst_output+0x0/0x20 <4>[67096.760255] [<ffffffff814b66d5>] raw_sendmsg+0x775/0x910 <4>[67096.760255] [<ffffffff8104c5a8>] ? flush_tlb_others_ipi+0x128/0x130 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff814c136a>] inet_sendmsg+0x4a/0xb0 <4>[67096.760255] [<ffffffff81444e93>] ? sock_sendmsg+0x13/0x140 <4>[67096.760255] [<ffffffff81444f97>] sock_sendmsg+0x117/0x140 <4>[67096.760255] [<ffffffff8102e299>] ? native_smp_send_reschedule+0x49/0x60 <4>[67096.760255] [<ffffffff81519beb>] ? _spin_unlock_bh+0x1b/0x20 <4>[67096.760255] [<ffffffff8109d930>] ? autoremove_wake_function+0x0/0x40 <4>[67096.760255] [<ffffffff814960f0>] ? do_ip_setsockopt+0x90/0xd80 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff8100bc4e>] ? apic_timer_interrupt+0xe/0x20 <4>[67096.760255] [<ffffffff814457c9>] sys_sendto+0x139/0x190 <4>[67096.760255] [<ffffffff810efa77>] ? audit_syscall_entry+0x1d7/0x200 <4>[67096.760255] [<ffffffff810ef7c5>] ? __audit_syscall_exit+0x265/0x290 <4>[67096.760255] [<ffffffff81474daf>] compat_sys_socketcall+0x13f/0x210 <4>[67096.760255] [<ffffffff8104dea3>] ia32_sysret+0x0/0x5 I have reused the original title for the RFC patch that Andrey posted and most of the original patch description. Cc: Eric Dumazet <edumazet@google.com> Cc: Andrew Vagin <avagin@parallels.com> Cc: Florian Westphal <fw@strlen.de> Reported-by: Andrew Vagin <avagin@parallels.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Andrew Vagin <avagin@parallels.com>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nf_conntrack_core.c34
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/xt_CT.c7
3 files changed, 32 insertions, 14 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4d1fb5d094c3..356bef519fe5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -448,7 +448,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
448 goto out; 448 goto out;
449 449
450 add_timer(&ct->timeout); 450 add_timer(&ct->timeout);
451 nf_conntrack_get(&ct->ct_general); 451 smp_wmb();
452 /* The caller holds a reference to this object */
453 atomic_set(&ct->ct_general.use, 2);
452 __nf_conntrack_hash_insert(ct, hash, repl_hash); 454 __nf_conntrack_hash_insert(ct, hash, repl_hash);
453 NF_CT_STAT_INC(net, insert); 455 NF_CT_STAT_INC(net, insert);
454 spin_unlock_bh(&nf_conntrack_lock); 456 spin_unlock_bh(&nf_conntrack_lock);
@@ -462,6 +464,21 @@ out:
462} 464}
463EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 465EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
464 466
467/* deletion from this larval template list happens via nf_ct_put() */
468void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
469{
470 __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
471 __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
472 nf_conntrack_get(&tmpl->ct_general);
473
474 spin_lock_bh(&nf_conntrack_lock);
475 /* Overload tuple linked list to put us in template list. */
476 hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
477 &net->ct.tmpl);
478 spin_unlock_bh(&nf_conntrack_lock);
479}
480EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
481
465/* Confirm a connection given skb; places it in hash table */ 482/* Confirm a connection given skb; places it in hash table */
466int 483int
467__nf_conntrack_confirm(struct sk_buff *skb) 484__nf_conntrack_confirm(struct sk_buff *skb)
@@ -733,11 +750,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
733 nf_ct_zone->id = zone; 750 nf_ct_zone->id = zone;
734 } 751 }
735#endif 752#endif
736 /* 753 /* Because we use RCU lookups, we set ct_general.use to zero before
737 * changes to lookup keys must be done before setting refcnt to 1 754 * this is inserted in any list.
738 */ 755 */
739 smp_wmb(); 756 atomic_set(&ct->ct_general.use, 0);
740 atomic_set(&ct->ct_general.use, 1);
741 return ct; 757 return ct;
742 758
743#ifdef CONFIG_NF_CONNTRACK_ZONES 759#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -761,6 +777,11 @@ void nf_conntrack_free(struct nf_conn *ct)
761{ 777{
762 struct net *net = nf_ct_net(ct); 778 struct net *net = nf_ct_net(ct);
763 779
780 /* A freed object has refcnt == 0, that's
781 * the golden rule for SLAB_DESTROY_BY_RCU
782 */
783 NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
784
764 nf_ct_ext_destroy(ct); 785 nf_ct_ext_destroy(ct);
765 nf_ct_ext_free(ct); 786 nf_ct_ext_free(ct);
766 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 787 kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
@@ -856,6 +877,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
856 NF_CT_STAT_INC(net, new); 877 NF_CT_STAT_INC(net, new);
857 } 878 }
858 879
880 /* Now it is inserted into the unconfirmed list, bump refcount */
881 nf_conntrack_get(&ct->ct_general);
882
859 /* Overload tuple linked list to put us in unconfirmed list. */ 883 /* Overload tuple linked list to put us in unconfirmed list. */
860 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 884 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
861 &net->ct.unconfirmed); 885 &net->ct.unconfirmed);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 9858e3e51a3a..52e20c9a46a5 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
363 goto err2; 363 goto err2;
364 if (!nfct_synproxy_ext_add(ct)) 364 if (!nfct_synproxy_ext_add(ct))
365 goto err2; 365 goto err2;
366 __set_bit(IPS_TEMPLATE_BIT, &ct->status);
367 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
368 366
367 nf_conntrack_tmpl_insert(net, ct);
369 snet->tmpl = ct; 368 snet->tmpl = ct;
370 369
371 snet->stats = alloc_percpu(struct synproxy_stats); 370 snet->stats = alloc_percpu(struct synproxy_stats);
@@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
390{ 389{
391 struct synproxy_net *snet = synproxy_pernet(net); 390 struct synproxy_net *snet = synproxy_pernet(net);
392 391
393 nf_conntrack_free(snet->tmpl); 392 nf_ct_put(snet->tmpl);
394 synproxy_proc_exit(net); 393 synproxy_proc_exit(net);
395 free_percpu(snet->stats); 394 free_percpu(snet->stats);
396} 395}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 5929be622c5c..75747aecdebe 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
228 goto err3; 228 goto err3;
229 } 229 }
230 230
231 __set_bit(IPS_TEMPLATE_BIT, &ct->status); 231 nf_conntrack_tmpl_insert(par->net, ct);
232 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
233
234 /* Overload tuple linked list to put us in template list. */
235 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
236 &par->net->ct.tmpl);
237out: 232out:
238 info->ct = ct; 233 info->ct = ct;
239 return 0; 234 return 0;