diff options
author | Liping Zhang <zlpnobody@gmail.com> | 2017-04-17 09:18:57 -0400 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2017-04-24 14:06:28 -0400 |
commit | 53b56da83d7899de375a9de153fd7f5397de85e6 (patch) | |
tree | 28962145c0e5a76f12c59d5eeb25fb7d4f939626 | |
parent | 88be4c09d9008f9ff337cbf48c5d0f06c8f872e7 (diff) |
netfilter: ctnetlink: make it safer when updating ct->status
After converting to use rcu for conntrack hash, one CPU may update
the ct->status via ctnetlink, while another CPU may process the
packets and update the ct->status.
So the non-atomic operation "ct->status |= status;" via ctnetlink
becomes unsafe, and this may clear the IPS_DYING_BIT bit set by
another CPU unexpectedly. For example:
CPU0 CPU1
ctnetlink_change_status __nf_conntrack_find_get
old = ct->status nf_ct_gc_expired
- nf_ct_kill
- test_and_set_bit(IPS_DYING_BIT
new = old | status; -
ct->status = new; <-- oops, _DYING_ is cleared!
Now using a series of atomic bit operation to solve the above issue.
Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly,
so make these two bits be unchangable too.
If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free,
but actually it is alloced by nf_conntrack_alloc.
If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer
deference, as the nfct_seqadj(ct) maybe NULL.
Last, add some comments to describe the logic change due to the
commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS
processing"), which makes me feel a little confusing.
Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r-- | include/uapi/linux/netfilter/nf_conntrack_common.h | 13 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 33 |
2 files changed, 33 insertions, 13 deletions
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecb..38fc383139f0 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h | |||
@@ -82,10 +82,6 @@ enum ip_conntrack_status { | |||
82 | IPS_DYING_BIT = 9, | 82 | IPS_DYING_BIT = 9, |
83 | IPS_DYING = (1 << IPS_DYING_BIT), | 83 | IPS_DYING = (1 << IPS_DYING_BIT), |
84 | 84 | ||
85 | /* Bits that cannot be altered from userland. */ | ||
86 | IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | | ||
87 | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), | ||
88 | |||
89 | /* Connection has fixed timeout. */ | 85 | /* Connection has fixed timeout. */ |
90 | IPS_FIXED_TIMEOUT_BIT = 10, | 86 | IPS_FIXED_TIMEOUT_BIT = 10, |
91 | IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), | 87 | IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), |
@@ -101,6 +97,15 @@ enum ip_conntrack_status { | |||
101 | /* Conntrack got a helper explicitly attached via CT target. */ | 97 | /* Conntrack got a helper explicitly attached via CT target. */ |
102 | IPS_HELPER_BIT = 13, | 98 | IPS_HELPER_BIT = 13, |
103 | IPS_HELPER = (1 << IPS_HELPER_BIT), | 99 | IPS_HELPER = (1 << IPS_HELPER_BIT), |
100 | |||
101 | /* Be careful here, modifying these bits can make things messy, | ||
102 | * so don't let users modify them directly. | ||
103 | */ | ||
104 | IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | | ||
105 | IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | | ||
106 | IPS_SEQ_ADJUST | IPS_TEMPLATE), | ||
107 | |||
108 | __IPS_MAX_BIT = 14, | ||
104 | }; | 109 | }; |
105 | 110 | ||
106 | /* Connection tracking event types */ | 111 | /* Connection tracking event types */ |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e5f97777b1f4..86deed6a8db4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -1419,6 +1419,24 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, | |||
1419 | } | 1419 | } |
1420 | #endif | 1420 | #endif |
1421 | 1421 | ||
1422 | static void | ||
1423 | __ctnetlink_change_status(struct nf_conn *ct, unsigned long on, | ||
1424 | unsigned long off) | ||
1425 | { | ||
1426 | unsigned int bit; | ||
1427 | |||
1428 | /* Ignore these unchangable bits */ | ||
1429 | on &= ~IPS_UNCHANGEABLE_MASK; | ||
1430 | off &= ~IPS_UNCHANGEABLE_MASK; | ||
1431 | |||
1432 | for (bit = 0; bit < __IPS_MAX_BIT; bit++) { | ||
1433 | if (on & (1 << bit)) | ||
1434 | set_bit(bit, &ct->status); | ||
1435 | else if (off & (1 << bit)) | ||
1436 | clear_bit(bit, &ct->status); | ||
1437 | } | ||
1438 | } | ||
1439 | |||
1422 | static int | 1440 | static int |
1423 | ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) | 1441 | ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) |
1424 | { | 1442 | { |
@@ -1438,10 +1456,7 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) | |||
1438 | /* ASSURED bit can only be set */ | 1456 | /* ASSURED bit can only be set */ |
1439 | return -EBUSY; | 1457 | return -EBUSY; |
1440 | 1458 | ||
1441 | /* Be careful here, modifying NAT bits can screw up things, | 1459 | __ctnetlink_change_status(ct, status, 0); |
1442 | * so don't let users modify them directly if they don't pass | ||
1443 | * nf_nat_range. */ | ||
1444 | ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); | ||
1445 | return 0; | 1460 | return 0; |
1446 | } | 1461 | } |
1447 | 1462 | ||
@@ -1628,7 +1643,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, | |||
1628 | if (ret < 0) | 1643 | if (ret < 0) |
1629 | return ret; | 1644 | return ret; |
1630 | 1645 | ||
1631 | ct->status |= IPS_SEQ_ADJUST; | 1646 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); |
1632 | } | 1647 | } |
1633 | 1648 | ||
1634 | if (cda[CTA_SEQ_ADJ_REPLY]) { | 1649 | if (cda[CTA_SEQ_ADJ_REPLY]) { |
@@ -1637,7 +1652,7 @@ ctnetlink_change_seq_adj(struct nf_conn *ct, | |||
1637 | if (ret < 0) | 1652 | if (ret < 0) |
1638 | return ret; | 1653 | return ret; |
1639 | 1654 | ||
1640 | ct->status |= IPS_SEQ_ADJUST; | 1655 | set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); |
1641 | } | 1656 | } |
1642 | 1657 | ||
1643 | return 0; | 1658 | return 0; |
@@ -2289,10 +2304,10 @@ ctnetlink_update_status(struct nf_conn *ct, const struct nlattr * const cda[]) | |||
2289 | /* This check is less strict than ctnetlink_change_status() | 2304 | /* This check is less strict than ctnetlink_change_status() |
2290 | * because callers often flip IPS_EXPECTED bits when sending | 2305 | * because callers often flip IPS_EXPECTED bits when sending |
2291 | * an NFQA_CT attribute to the kernel. So ignore the | 2306 | * an NFQA_CT attribute to the kernel. So ignore the |
2292 | * unchangeable bits but do not error out. | 2307 | * unchangeable bits but do not error out. Also user programs |
2308 | * are allowed to clear the bits that they are allowed to change. | ||
2293 | */ | 2309 | */ |
2294 | ct->status = (status & ~IPS_UNCHANGEABLE_MASK) | | 2310 | __ctnetlink_change_status(ct, status, ~status); |
2295 | (ct->status & IPS_UNCHANGEABLE_MASK); | ||
2296 | return 0; | 2311 | return 0; |
2297 | } | 2312 | } |
2298 | 2313 | ||