aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2014-11-24 18:14:47 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2015-01-06 16:27:45 -0500
commit8ca3f5e974f2b4b7f711589f4abff920db36637a (patch)
tree96c599dd113c822d1a666bc2a049c3778acd1aeb
parent7b5bca4676c7cd78b0f1ef8a132ef3ba9863c9ef (diff)
netfilter: conntrack: fix race between confirmation and flush
Commit 5195c14c8b27c ("netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse") aimed to resolve the race condition between the confirmation (packet path) and the flush command (from control plane). However, it introduced a crash when several packets race to add a new conntrack, which seems easier to reproduce when nf_queue is in place. Fix this race, in __nf_conntrack_confirm(), by removing the CT from unconfirmed list before checking the DYING bit. In case race occured, re-add the CT to the dying list This patch also changes the verdict from NF_ACCEPT to NF_DROP when we lose race. Basically, the confirmation happens for the first packet that we see in a flow. If you just invoked conntrack -F once (which should be the common case), then this is likely to be the first packet of the flow (unless you already called flush anytime soon in the past). This should be hard to trigger, but better drop this packet, otherwise we leave things in inconsistent state since the destination will likely reply to this packet, but it will find no conntrack, unless the origin retransmits. The change of the verdict has been discussed in: https://www.marc.info/?l=linux-netdev&m=141588039530056&w=2 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--net/netfilter/nf_conntrack_core.c20
1 files changed, 9 insertions, 11 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a11674806707..46d1b26a468e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -611,16 +611,15 @@ __nf_conntrack_confirm(struct sk_buff *skb)
611 */ 611 */
612 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 612 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
613 pr_debug("Confirming conntrack %p\n", ct); 613 pr_debug("Confirming conntrack %p\n", ct);
614 /* We have to check the DYING flag inside the lock to prevent 614 /* We have to check the DYING flag after unlink to prevent
615 a race against nf_ct_get_next_corpse() possibly called from 615 * a race against nf_ct_get_next_corpse() possibly called from
616 user context, else we insert an already 'dead' hash, blocking 616 * user context, else we insert an already 'dead' hash, blocking
617 further use of that particular connection -JM */ 617 * further use of that particular connection -JM.
618 */
619 nf_ct_del_from_dying_or_unconfirmed_list(ct);
618 620
619 if (unlikely(nf_ct_is_dying(ct))) { 621 if (unlikely(nf_ct_is_dying(ct)))
620 nf_conntrack_double_unlock(hash, reply_hash); 622 goto out;
621 local_bh_enable();
622 return NF_ACCEPT;
623 }
624 623
625 /* See if there's one in the list already, including reverse: 624 /* See if there's one in the list already, including reverse:
626 NAT could have grabbed it without realizing, since we're 625 NAT could have grabbed it without realizing, since we're
@@ -636,8 +635,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
636 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) 635 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
637 goto out; 636 goto out;
638 637
639 nf_ct_del_from_dying_or_unconfirmed_list(ct);
640
641 /* Timer relative to confirmation time, not original 638 /* Timer relative to confirmation time, not original
642 setting time, otherwise we'd get timer wrap in 639 setting time, otherwise we'd get timer wrap in
643 weird delay cases. */ 640 weird delay cases. */
@@ -673,6 +670,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
673 return NF_ACCEPT; 670 return NF_ACCEPT;
674 671
675out: 672out:
673 nf_ct_add_to_dying_list(ct);
676 nf_conntrack_double_unlock(hash, reply_hash); 674 nf_conntrack_double_unlock(hash, reply_hash);
677 NF_CT_STAT_INC(net, insert_failed); 675 NF_CT_STAT_INC(net, insert_failed);
678 local_bh_enable(); 676 local_bh_enable();