aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLiping Zhang <zlpnobody@gmail.com>2017-04-17 09:18:57 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2017-04-24 14:06:28 -0400
commit53b56da83d7899de375a9de153fd7f5397de85e6 (patch)
tree28962145c0e5a76f12c59d5eeb25fb7d4f939626
parent88be4c09d9008f9ff337cbf48c5d0f06c8f872e7 (diff)
netfilter: ctnetlink: make it safer when updating ct->status
After converting to use rcu for conntrack hash, one CPU may update the ct->status via ctnetlink, while another CPU may process the packets and update the ct->status. So the non-atomic operation "ct->status |= status;" via ctnetlink becomes unsafe, and this may clear the IPS_DYING_BIT bit set by another CPU unexpectedly. For example: CPU0 CPU1 ctnetlink_change_status __nf_conntrack_find_get old = ct->status nf_ct_gc_expired - nf_ct_kill - test_and_set_bit(IPS_DYING_BIT new = old | status; - ct->status = new; <-- oops, _DYING_ is cleared! Now using a series of atomic bit operation to solve the above issue. Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly, so make these two bits be unchangable too. If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free, but actually it is alloced by nf_conntrack_alloc. If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer deference, as the nfct_seqadj(ct) maybe NULL. Last, add some comments to describe the logic change due to the commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS processing"), which makes me feel a little confusing. Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash") Signed-off-by: Liping Zhang <zlpnobody@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h13
-rw-r--r--net/netfilter/nf_conntrack_netlink.c33
2 files changed, 33 insertions, 13 deletions
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 6a8e33dd4ecb..38fc383139f0 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -82,10 +82,6 @@ enum ip_conntrack_status {
82 IPS_DYING_BIT = 9, 82 IPS_DYING_BIT = 9,
83 IPS_DYING = (1 << IPS_DYING_BIT), 83 IPS_DYING = (1 << IPS_DYING_BIT),
84 84
85 /* Bits that cannot be altered from userland. */
86 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
87 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING),
88
89 /* Connection has fixed timeout. */ 85 /* Connection has fixed timeout. */
90 IPS_FIXED_TIMEOUT_BIT = 10, 86 IPS_FIXED_TIMEOUT_BIT = 10,
91 IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), 87 IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT),
@@ -101,6 +97,15 @@ enum ip_conntrack_status {
101 /* Conntrack got a helper explicitly attached via CT target. */ 97 /* Conntrack got a helper explicitly attached via CT target. */
102 IPS_HELPER_BIT = 13, 98 IPS_HELPER_BIT = 13,
103 IPS_HELPER = (1 << IPS_HELPER_BIT), 99 IPS_HELPER = (1 << IPS_HELPER_BIT),
100
101 /* Be careful here, modifying these bits can make things messy,
102 * so don't let users modify them directly.
103 */
104 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
105 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
106 IPS_SEQ_ADJUST | IPS_TEMPLATE),
107
108 __IPS_MAX_BIT = 14,
104}; 109};
105 110
106/* Connection tracking event types */ 111/* Connection tracking event types */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e5f97777b1f4..86deed6a8db4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
1419} 1419}
1420#endif 1420#endif
1421 1421
1422static void
1423__ctnetlink_change_status(struct nf_conn *ct, unsigned long on,
1424 unsigned long off)
1425{
1426 unsigned int bit;
1427
1428 /* Ignore these unchangable bits */
1429 on &= ~IPS_UNCHANGEABLE_MASK;
1430 off &= ~IPS_UNCHANGEABLE_MASK;
1431
1432 for (bit = 0; bit < __IPS_MAX_BIT; bit++) {
1433 if (on & (1 << bit))
1434 set_bit(bit, &ct->status);
1435 else if (off & (1 << bit))
1436 clear_bit(bit, &ct->status);
1437 }
1438}
1439
1422static int 1440static int
1423ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) 1441ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
1424{ 1442{
@@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
1438 /* ASSURED bit can only be set */ 1456 /* ASSURED bit can only be set */
1439 return -EBUSY; 1457 return -EBUSY;
1440 1458
1441 /* Be careful here, modifying NAT bits can screw up things, 1459 __ctnetlink_change_status(ct, status, 0);
1442 * so don't let users modify them directly if they don't pass
1443 * nf_nat_range. */
1444 ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
1445 return 0; 1460 return 0;
1446} 1461}
1447 1462
@@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
1628 if (ret < 0) 1643 if (ret < 0)
1629 return ret; 1644 return ret;
1630 1645
1631 ct->status |= IPS_SEQ_ADJUST; 1646 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
1632 } 1647 }
1633 1648
1634 if (cda[CTA_SEQ_ADJ_REPLY]) { 1649 if (cda[CTA_SEQ_ADJ_REPLY]) {
@@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct,
1637 if (ret < 0) 1652 if (ret < 0)
1638 return ret; 1653 return ret;
1639 1654
1640 ct->status |= IPS_SEQ_ADJUST; 1655 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
1641 } 1656 }
1642 1657
1643 return 0; 1658 return 0;
@@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[])
2289 /* This check is less strict than ctnetlink_change_status() 2304 /* This check is less strict than ctnetlink_change_status()
2290 * because callers often flip IPS_EXPECTED bits when sending 2305 * because callers often flip IPS_EXPECTED bits when sending
2291 * an NFQA_CT attribute to the kernel. So ignore the 2306 * an NFQA_CT attribute to the kernel. So ignore the
2292 * unchangeable bits but do not error out. 2307 * unchangeable bits but do not error out. Also user programs
2308 * are allowed to clear the bits that they are allowed to change.
2293 */ 2309 */
2294 ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | 2310 __ctnetlink_change_status(ct, status, ~status);
2295 (ct->status & IPS_UNCHANGEABLE_MASK);
2296 return 0; 2311 return 0;
2297} 2312}
2298 2313