diff options
author | Patrick McHardy <kaber@trash.net> | 2005-08-09 23:02:13 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:38:54 -0400 |
commit | a86888b925299330053d20e0eba03ac4d2648c4b (patch) | |
tree | 27c2d22d98a9eed22749df1a8d32f72e1b5a2468 | |
parent | a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f (diff) |
[NETFILTER]: Fix multiple problems with the conntrack event cache
refcnt underflow: the reference count is decremented when a conntrack
entry is removed from the hash but it is not incremented when entering
new entries.
missing protection of process context against softirq context: all
cache operations need to locally disable softirqs to avoid races.
Additionally the event cache can't be initialized when a packet
enteres the conntrack code but needs to be initialized whenever we
cache an event and the stored conntrack entry doesn't match the
current one.
incorrect flushing of the event cache in ip_ct_iterate_cleanup:
without real locking we can't flush the cache for different CPUs
without incurring races. The cache for different CPUs can only be
flushed when no packets are going through the
code. ip_ct_iterate_cleanup doesn't need to drop all references, so
flushing is moved to the cleanup path.
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netfilter_ipv4/ip_conntrack.h | 29 | ||||
-rw-r--r-- | include/linux/netfilter_ipv4/ip_conntrack_core.h | 14 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 105 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_standalone.c | 3 |
4 files changed, 57 insertions, 94 deletions
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ff2c1c6001f9..088742befe49 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h | |||
@@ -411,6 +411,7 @@ struct ip_conntrack_stat | |||
411 | 411 | ||
412 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | 412 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS |
413 | #include <linux/notifier.h> | 413 | #include <linux/notifier.h> |
414 | #include <linux/interrupt.h> | ||
414 | 415 | ||
415 | struct ip_conntrack_ecache { | 416 | struct ip_conntrack_ecache { |
416 | struct ip_conntrack *ct; | 417 | struct ip_conntrack *ct; |
@@ -445,26 +446,24 @@ ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) | |||
445 | return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); | 446 | return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); |
446 | } | 447 | } |
447 | 448 | ||
449 | extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct); | ||
450 | extern void __ip_ct_event_cache_init(struct ip_conntrack *ct); | ||
451 | |||
448 | static inline void | 452 | static inline void |
449 | ip_conntrack_event_cache(enum ip_conntrack_events event, | 453 | ip_conntrack_event_cache(enum ip_conntrack_events event, |
450 | const struct sk_buff *skb) | 454 | const struct sk_buff *skb) |
451 | { | 455 | { |
452 | struct ip_conntrack_ecache *ecache = | 456 | struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct; |
453 | &__get_cpu_var(ip_conntrack_ecache); | 457 | struct ip_conntrack_ecache *ecache; |
454 | 458 | ||
455 | if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { | 459 | local_bh_disable(); |
456 | if (net_ratelimit()) { | 460 | ecache = &__get_cpu_var(ip_conntrack_ecache); |
457 | printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); | 461 | if (ct != ecache->ct) |
458 | dump_stack(); | 462 | __ip_ct_event_cache_init(ct); |
459 | } | ||
460 | } | ||
461 | ecache->events |= event; | 463 | ecache->events |= event; |
464 | local_bh_enable(); | ||
462 | } | 465 | } |
463 | 466 | ||
464 | extern void | ||
465 | ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); | ||
466 | extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); | ||
467 | |||
468 | static inline void ip_conntrack_event(enum ip_conntrack_events event, | 467 | static inline void ip_conntrack_event(enum ip_conntrack_events event, |
469 | struct ip_conntrack *ct) | 468 | struct ip_conntrack *ct) |
470 | { | 469 | { |
@@ -483,9 +482,7 @@ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, | |||
483 | const struct sk_buff *skb) {} | 482 | const struct sk_buff *skb) {} |
484 | static inline void ip_conntrack_event(enum ip_conntrack_events event, | 483 | static inline void ip_conntrack_event(enum ip_conntrack_events event, |
485 | struct ip_conntrack *ct) {} | 484 | struct ip_conntrack *ct) {} |
486 | static inline void ip_conntrack_deliver_cached_events_for( | 485 | static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {} |
487 | struct ip_conntrack *ct) {} | ||
488 | static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} | ||
489 | static inline void | 486 | static inline void |
490 | ip_conntrack_expect_event(enum ip_conntrack_expect_events event, | 487 | ip_conntrack_expect_event(enum ip_conntrack_expect_events event, |
491 | struct ip_conntrack_expect *exp) {} | 488 | struct ip_conntrack_expect *exp) {} |
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index fbf6c3e41647..dc4d2a0575de 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h | |||
@@ -44,18 +44,14 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) | |||
44 | struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; | 44 | struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; |
45 | int ret = NF_ACCEPT; | 45 | int ret = NF_ACCEPT; |
46 | 46 | ||
47 | if (ct && !is_confirmed(ct)) | 47 | if (ct) { |
48 | ret = __ip_conntrack_confirm(pskb); | 48 | if (!is_confirmed(ct)) |
49 | ip_conntrack_deliver_cached_events_for(ct); | 49 | ret = __ip_conntrack_confirm(pskb); |
50 | 50 | ip_ct_deliver_cached_events(ct); | |
51 | } | ||
51 | return ret; | 52 | return ret; |
52 | } | 53 | } |
53 | 54 | ||
54 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
55 | struct ip_conntrack_ecache; | ||
56 | extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); | ||
57 | #endif | ||
58 | |||
59 | extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); | 55 | extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); |
60 | 56 | ||
61 | extern struct list_head *ip_conntrack_hash; | 57 | extern struct list_head *ip_conntrack_hash; |
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index d9fddae8d787..5c3f16eae2d8 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c | |||
@@ -85,73 +85,62 @@ struct notifier_block *ip_conntrack_expect_chain; | |||
85 | 85 | ||
86 | DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); | 86 | DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); |
87 | 87 | ||
88 | static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) | 88 | /* deliver cached events and clear cache entry - must be called with locally |
89 | * disabled softirqs */ | ||
90 | static inline void | ||
91 | __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) | ||
89 | { | 92 | { |
93 | DEBUGP("ecache: delivering events for %p\n", ecache->ct); | ||
90 | if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) | 94 | if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) |
91 | notifier_call_chain(&ip_conntrack_chain, ecache->events, | 95 | notifier_call_chain(&ip_conntrack_chain, ecache->events, |
92 | ecache->ct); | 96 | ecache->ct); |
93 | ecache->events = 0; | 97 | ecache->events = 0; |
94 | } | 98 | ip_conntrack_put(ecache->ct); |
95 | 99 | ecache->ct = NULL; | |
96 | void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) | ||
97 | { | ||
98 | __deliver_cached_events(ecache); | ||
99 | } | 100 | } |
100 | 101 | ||
101 | /* Deliver all cached events for a particular conntrack. This is called | 102 | /* Deliver all cached events for a particular conntrack. This is called |
102 | * by code prior to async packet handling or freeing the skb */ | 103 | * by code prior to async packet handling or freeing the skb */ |
103 | void | 104 | void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) |
104 | ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) | ||
105 | { | 105 | { |
106 | struct ip_conntrack_ecache *ecache = | 106 | struct ip_conntrack_ecache *ecache; |
107 | &__get_cpu_var(ip_conntrack_ecache); | 107 | |
108 | 108 | local_bh_disable(); | |
109 | if (!ct) | 109 | ecache = &__get_cpu_var(ip_conntrack_ecache); |
110 | return; | 110 | if (ecache->ct == ct) |
111 | __ip_ct_deliver_cached_events(ecache); | ||
112 | local_bh_enable(); | ||
113 | } | ||
111 | 114 | ||
112 | if (ecache->ct == ct) { | 115 | void __ip_ct_event_cache_init(struct ip_conntrack *ct) |
113 | DEBUGP("ecache: delivering event for %p\n", ct); | 116 | { |
114 | __deliver_cached_events(ecache); | 117 | struct ip_conntrack_ecache *ecache; |
115 | } else { | ||
116 | if (net_ratelimit()) | ||
117 | printk(KERN_WARNING "ecache: want to deliver for %p, " | ||
118 | "but cache has %p\n", ct, ecache->ct); | ||
119 | } | ||
120 | 118 | ||
121 | /* signalize that events have already been delivered */ | 119 | /* take care of delivering potentially old events */ |
122 | ecache->ct = NULL; | 120 | ecache = &__get_cpu_var(ip_conntrack_ecache); |
121 | BUG_ON(ecache->ct == ct); | ||
122 | if (ecache->ct) | ||
123 | __ip_ct_deliver_cached_events(ecache); | ||
124 | /* initialize for this conntrack/packet */ | ||
125 | ecache->ct = ct; | ||
126 | nf_conntrack_get(&ct->ct_general); | ||
123 | } | 127 | } |
124 | 128 | ||
125 | /* Deliver cached events for old pending events, if current conntrack != old */ | 129 | /* flush the event cache - touches other CPU's data and must not be called while |
126 | void ip_conntrack_event_cache_init(const struct sk_buff *skb) | 130 | * packets are still passing through the code */ |
131 | static void ip_ct_event_cache_flush(void) | ||
127 | { | 132 | { |
128 | struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; | 133 | struct ip_conntrack_ecache *ecache; |
129 | struct ip_conntrack_ecache *ecache = | 134 | int cpu; |
130 | &__get_cpu_var(ip_conntrack_ecache); | ||
131 | 135 | ||
132 | /* take care of delivering potentially old events */ | 136 | for_each_cpu(cpu) { |
133 | if (ecache->ct != ct) { | 137 | ecache = &per_cpu(ip_conntrack_ecache, cpu); |
134 | enum ip_conntrack_info ctinfo; | 138 | if (ecache->ct) |
135 | /* we have to check, since at startup the cache is NULL */ | ||
136 | if (likely(ecache->ct)) { | ||
137 | DEBUGP("ecache: entered for different conntrack: " | ||
138 | "ecache->ct=%p, skb->nfct=%p. delivering " | ||
139 | "events\n", ecache->ct, ct); | ||
140 | __deliver_cached_events(ecache); | ||
141 | ip_conntrack_put(ecache->ct); | 139 | ip_conntrack_put(ecache->ct); |
142 | } else { | ||
143 | DEBUGP("ecache: entered for conntrack %p, " | ||
144 | "cache was clean before\n", ct); | ||
145 | } | ||
146 | |||
147 | /* initialize for this conntrack/packet */ | ||
148 | ecache->ct = ip_conntrack_get(skb, &ctinfo); | ||
149 | /* ecache->events cleared by __deliver_cached_devents() */ | ||
150 | } else { | ||
151 | DEBUGP("ecache: re-entered for conntrack %p.\n", ct); | ||
152 | } | 140 | } |
153 | } | 141 | } |
154 | 142 | #else | |
143 | static inline void ip_ct_event_cache_flush(void) {} | ||
155 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ | 144 | #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ |
156 | 145 | ||
157 | DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); | 146 | DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); |
@@ -878,8 +867,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, | |||
878 | 867 | ||
879 | IP_NF_ASSERT((*pskb)->nfct); | 868 | IP_NF_ASSERT((*pskb)->nfct); |
880 | 869 | ||
881 | ip_conntrack_event_cache_init(*pskb); | ||
882 | |||
883 | ret = proto->packet(ct, *pskb, ctinfo); | 870 | ret = proto->packet(ct, *pskb, ctinfo); |
884 | if (ret < 0) { | 871 | if (ret < 0) { |
885 | /* Invalid: inverse of the return code tells | 872 | /* Invalid: inverse of the return code tells |
@@ -1278,23 +1265,6 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) | |||
1278 | 1265 | ||
1279 | ip_conntrack_put(ct); | 1266 | ip_conntrack_put(ct); |
1280 | } | 1267 | } |
1281 | |||
1282 | #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS | ||
1283 | { | ||
1284 | /* we need to deliver all cached events in order to drop | ||
1285 | * the reference counts */ | ||
1286 | int cpu; | ||
1287 | for_each_cpu(cpu) { | ||
1288 | struct ip_conntrack_ecache *ecache = | ||
1289 | &per_cpu(ip_conntrack_ecache, cpu); | ||
1290 | if (ecache->ct) { | ||
1291 | __ip_ct_deliver_cached_events(ecache); | ||
1292 | ip_conntrack_put(ecache->ct); | ||
1293 | ecache->ct = NULL; | ||
1294 | } | ||
1295 | } | ||
1296 | } | ||
1297 | #endif | ||
1298 | } | 1268 | } |
1299 | 1269 | ||
1300 | /* Fast function for those who don't want to parse /proc (and I don't | 1270 | /* Fast function for those who don't want to parse /proc (and I don't |
@@ -1381,6 +1351,7 @@ void ip_conntrack_flush() | |||
1381 | delete... */ | 1351 | delete... */ |
1382 | synchronize_net(); | 1352 | synchronize_net(); |
1383 | 1353 | ||
1354 | ip_ct_event_cache_flush(); | ||
1384 | i_see_dead_people: | 1355 | i_see_dead_people: |
1385 | ip_ct_iterate_cleanup(kill_all, NULL); | 1356 | ip_ct_iterate_cleanup(kill_all, NULL); |
1386 | if (atomic_read(&ip_conntrack_count) != 0) { | 1357 | if (atomic_read(&ip_conntrack_count) != 0) { |
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ca97c3ac2f2a..ee5895afd0c3 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c | |||
@@ -401,7 +401,6 @@ static unsigned int ip_confirm(unsigned int hooknum, | |||
401 | const struct net_device *out, | 401 | const struct net_device *out, |
402 | int (*okfn)(struct sk_buff *)) | 402 | int (*okfn)(struct sk_buff *)) |
403 | { | 403 | { |
404 | ip_conntrack_event_cache_init(*pskb); | ||
405 | /* We've seen it coming out the other side: confirm it */ | 404 | /* We've seen it coming out the other side: confirm it */ |
406 | return ip_conntrack_confirm(pskb); | 405 | return ip_conntrack_confirm(pskb); |
407 | } | 406 | } |
@@ -419,7 +418,6 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, | |||
419 | ct = ip_conntrack_get(*pskb, &ctinfo); | 418 | ct = ip_conntrack_get(*pskb, &ctinfo); |
420 | if (ct && ct->helper) { | 419 | if (ct && ct->helper) { |
421 | unsigned int ret; | 420 | unsigned int ret; |
422 | ip_conntrack_event_cache_init(*pskb); | ||
423 | ret = ct->helper->help(pskb, ct, ctinfo); | 421 | ret = ct->helper->help(pskb, ct, ctinfo); |
424 | if (ret != NF_ACCEPT) | 422 | if (ret != NF_ACCEPT) |
425 | return ret; | 423 | return ret; |
@@ -978,6 +976,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_chain); | |||
978 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); | 976 | EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); |
979 | EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); | 977 | EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); |
980 | EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); | 978 | EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); |
979 | EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); | ||
981 | EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); | 980 | EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); |
982 | #endif | 981 | #endif |
983 | EXPORT_SYMBOL(ip_conntrack_protocol_register); | 982 | EXPORT_SYMBOL(ip_conntrack_protocol_register); |