aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2005-08-09 23:02:13 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 18:38:54 -0400
commita86888b925299330053d20e0eba03ac4d2648c4b (patch)
tree27c2d22d98a9eed22749df1a8d32f72e1b5a2468
parenta55ebcc4c4532107ad9eee1c9bb698ab5f12c00f (diff)
[NETFILTER]: Fix multiple problems with the conntrack event cache
refcnt underflow: the reference count is decremented when a conntrack entry is removed from the hash but it is not incremented when entering new entries. missing protection of process context against softirq context: all cache operations need to locally disable softirqs to avoid races. Additionally the event cache can't be initialized when a packet enteres the conntrack code but needs to be initialized whenever we cache an event and the stored conntrack entry doesn't match the current one. incorrect flushing of the event cache in ip_ct_iterate_cleanup: without real locking we can't flush the cache for different CPUs without incurring races. The cache for different CPUs can only be flushed when no packets are going through the code. ip_ct_iterate_cleanup doesn't need to drop all references, so flushing is moved to the cleanup path. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h29
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h14
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c105
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c3
4 files changed, 57 insertions, 94 deletions
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index ff2c1c6001f9..088742befe49 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -411,6 +411,7 @@ struct ip_conntrack_stat
411 411
412#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS 412#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
413#include <linux/notifier.h> 413#include <linux/notifier.h>
414#include <linux/interrupt.h>
414 415
415struct ip_conntrack_ecache { 416struct ip_conntrack_ecache {
416 struct ip_conntrack *ct; 417 struct ip_conntrack *ct;
@@ -445,26 +446,24 @@ ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
445 return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); 446 return notifier_chain_unregister(&ip_conntrack_expect_chain, nb);
446} 447}
447 448
449extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
450extern void __ip_ct_event_cache_init(struct ip_conntrack *ct);
451
448static inline void 452static inline void
449ip_conntrack_event_cache(enum ip_conntrack_events event, 453ip_conntrack_event_cache(enum ip_conntrack_events event,
450 const struct sk_buff *skb) 454 const struct sk_buff *skb)
451{ 455{
452 struct ip_conntrack_ecache *ecache = 456 struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
453 &__get_cpu_var(ip_conntrack_ecache); 457 struct ip_conntrack_ecache *ecache;
454 458
455 if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { 459 local_bh_disable();
456 if (net_ratelimit()) { 460 ecache = &__get_cpu_var(ip_conntrack_ecache);
457 printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); 461 if (ct != ecache->ct)
458 dump_stack(); 462 __ip_ct_event_cache_init(ct);
459 }
460 }
461 ecache->events |= event; 463 ecache->events |= event;
464 local_bh_enable();
462} 465}
463 466
464extern void
465ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct);
466extern void ip_conntrack_event_cache_init(const struct sk_buff *skb);
467
468static inline void ip_conntrack_event(enum ip_conntrack_events event, 467static inline void ip_conntrack_event(enum ip_conntrack_events event,
469 struct ip_conntrack *ct) 468 struct ip_conntrack *ct)
470{ 469{
@@ -483,9 +482,7 @@ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event,
483 const struct sk_buff *skb) {} 482 const struct sk_buff *skb) {}
484static inline void ip_conntrack_event(enum ip_conntrack_events event, 483static inline void ip_conntrack_event(enum ip_conntrack_events event,
485 struct ip_conntrack *ct) {} 484 struct ip_conntrack *ct) {}
486static inline void ip_conntrack_deliver_cached_events_for( 485static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {}
487 struct ip_conntrack *ct) {}
488static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {}
489static inline void 486static inline void
490ip_conntrack_expect_event(enum ip_conntrack_expect_events event, 487ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
491 struct ip_conntrack_expect *exp) {} 488 struct ip_conntrack_expect *exp) {}
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index fbf6c3e41647..dc4d2a0575de 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -44,18 +44,14 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
44 struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; 44 struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
45 int ret = NF_ACCEPT; 45 int ret = NF_ACCEPT;
46 46
47 if (ct && !is_confirmed(ct)) 47 if (ct) {
48 ret = __ip_conntrack_confirm(pskb); 48 if (!is_confirmed(ct))
49 ip_conntrack_deliver_cached_events_for(ct); 49 ret = __ip_conntrack_confirm(pskb);
50 50 ip_ct_deliver_cached_events(ct);
51 }
51 return ret; 52 return ret;
52} 53}
53 54
54#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
55struct ip_conntrack_ecache;
56extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec);
57#endif
58
59extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); 55extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp);
60 56
61extern struct list_head *ip_conntrack_hash; 57extern struct list_head *ip_conntrack_hash;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index d9fddae8d787..5c3f16eae2d8 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -85,73 +85,62 @@ struct notifier_block *ip_conntrack_expect_chain;
85 85
86DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); 86DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
87 87
88static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) 88/* deliver cached events and clear cache entry - must be called with locally
89 * disabled softirqs */
90static inline void
91__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
89{ 92{
93 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
90 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) 94 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
91 notifier_call_chain(&ip_conntrack_chain, ecache->events, 95 notifier_call_chain(&ip_conntrack_chain, ecache->events,
92 ecache->ct); 96 ecache->ct);
93 ecache->events = 0; 97 ecache->events = 0;
94} 98 ip_conntrack_put(ecache->ct);
95 99 ecache->ct = NULL;
96void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
97{
98 __deliver_cached_events(ecache);
99} 100}
100 101
101/* Deliver all cached events for a particular conntrack. This is called 102/* Deliver all cached events for a particular conntrack. This is called
102 * by code prior to async packet handling or freeing the skb */ 103 * by code prior to async packet handling or freeing the skb */
103void 104void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
104ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
105{ 105{
106 struct ip_conntrack_ecache *ecache = 106 struct ip_conntrack_ecache *ecache;
107 &__get_cpu_var(ip_conntrack_ecache); 107
108 108 local_bh_disable();
109 if (!ct) 109 ecache = &__get_cpu_var(ip_conntrack_ecache);
110 return; 110 if (ecache->ct == ct)
111 __ip_ct_deliver_cached_events(ecache);
112 local_bh_enable();
113}
111 114
112 if (ecache->ct == ct) { 115void __ip_ct_event_cache_init(struct ip_conntrack *ct)
113 DEBUGP("ecache: delivering event for %p\n", ct); 116{
114 __deliver_cached_events(ecache); 117 struct ip_conntrack_ecache *ecache;
115 } else {
116 if (net_ratelimit())
117 printk(KERN_WARNING "ecache: want to deliver for %p, "
118 "but cache has %p\n", ct, ecache->ct);
119 }
120 118
121 /* signalize that events have already been delivered */ 119 /* take care of delivering potentially old events */
122 ecache->ct = NULL; 120 ecache = &__get_cpu_var(ip_conntrack_ecache);
121 BUG_ON(ecache->ct == ct);
122 if (ecache->ct)
123 __ip_ct_deliver_cached_events(ecache);
124 /* initialize for this conntrack/packet */
125 ecache->ct = ct;
126 nf_conntrack_get(&ct->ct_general);
123} 127}
124 128
125/* Deliver cached events for old pending events, if current conntrack != old */ 129/* flush the event cache - touches other CPU's data and must not be called while
126void ip_conntrack_event_cache_init(const struct sk_buff *skb) 130 * packets are still passing through the code */
131static void ip_ct_event_cache_flush(void)
127{ 132{
128 struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; 133 struct ip_conntrack_ecache *ecache;
129 struct ip_conntrack_ecache *ecache = 134 int cpu;
130 &__get_cpu_var(ip_conntrack_ecache);
131 135
132 /* take care of delivering potentially old events */ 136 for_each_cpu(cpu) {
133 if (ecache->ct != ct) { 137 ecache = &per_cpu(ip_conntrack_ecache, cpu);
134 enum ip_conntrack_info ctinfo; 138 if (ecache->ct)
135 /* we have to check, since at startup the cache is NULL */
136 if (likely(ecache->ct)) {
137 DEBUGP("ecache: entered for different conntrack: "
138 "ecache->ct=%p, skb->nfct=%p. delivering "
139 "events\n", ecache->ct, ct);
140 __deliver_cached_events(ecache);
141 ip_conntrack_put(ecache->ct); 139 ip_conntrack_put(ecache->ct);
142 } else {
143 DEBUGP("ecache: entered for conntrack %p, "
144 "cache was clean before\n", ct);
145 }
146
147 /* initialize for this conntrack/packet */
148 ecache->ct = ip_conntrack_get(skb, &ctinfo);
149 /* ecache->events cleared by __deliver_cached_devents() */
150 } else {
151 DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
152 } 140 }
153} 141}
154 142#else
143static inline void ip_ct_event_cache_flush(void) {}
155#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ 144#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
156 145
157DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); 146DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
@@ -878,8 +867,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
878 867
879 IP_NF_ASSERT((*pskb)->nfct); 868 IP_NF_ASSERT((*pskb)->nfct);
880 869
881 ip_conntrack_event_cache_init(*pskb);
882
883 ret = proto->packet(ct, *pskb, ctinfo); 870 ret = proto->packet(ct, *pskb, ctinfo);
884 if (ret < 0) { 871 if (ret < 0) {
885 /* Invalid: inverse of the return code tells 872 /* Invalid: inverse of the return code tells
@@ -1278,23 +1265,6 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1278 1265
1279 ip_conntrack_put(ct); 1266 ip_conntrack_put(ct);
1280 } 1267 }
1281
1282#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
1283 {
1284 /* we need to deliver all cached events in order to drop
1285 * the reference counts */
1286 int cpu;
1287 for_each_cpu(cpu) {
1288 struct ip_conntrack_ecache *ecache =
1289 &per_cpu(ip_conntrack_ecache, cpu);
1290 if (ecache->ct) {
1291 __ip_ct_deliver_cached_events(ecache);
1292 ip_conntrack_put(ecache->ct);
1293 ecache->ct = NULL;
1294 }
1295 }
1296 }
1297#endif
1298} 1268}
1299 1269
1300/* Fast function for those who don't want to parse /proc (and I don't 1270/* Fast function for those who don't want to parse /proc (and I don't
@@ -1381,6 +1351,7 @@ void ip_conntrack_flush()
1381 delete... */ 1351 delete... */
1382 synchronize_net(); 1352 synchronize_net();
1383 1353
1354 ip_ct_event_cache_flush();
1384 i_see_dead_people: 1355 i_see_dead_people:
1385 ip_ct_iterate_cleanup(kill_all, NULL); 1356 ip_ct_iterate_cleanup(kill_all, NULL);
1386 if (atomic_read(&ip_conntrack_count) != 0) { 1357 if (atomic_read(&ip_conntrack_count) != 0) {
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ca97c3ac2f2a..ee5895afd0c3 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -401,7 +401,6 @@ static unsigned int ip_confirm(unsigned int hooknum,
401 const struct net_device *out, 401 const struct net_device *out,
402 int (*okfn)(struct sk_buff *)) 402 int (*okfn)(struct sk_buff *))
403{ 403{
404 ip_conntrack_event_cache_init(*pskb);
405 /* We've seen it coming out the other side: confirm it */ 404 /* We've seen it coming out the other side: confirm it */
406 return ip_conntrack_confirm(pskb); 405 return ip_conntrack_confirm(pskb);
407} 406}
@@ -419,7 +418,6 @@ static unsigned int ip_conntrack_help(unsigned int hooknum,
419 ct = ip_conntrack_get(*pskb, &ctinfo); 418 ct = ip_conntrack_get(*pskb, &ctinfo);
420 if (ct && ct->helper) { 419 if (ct && ct->helper) {
421 unsigned int ret; 420 unsigned int ret;
422 ip_conntrack_event_cache_init(*pskb);
423 ret = ct->helper->help(pskb, ct, ctinfo); 421 ret = ct->helper->help(pskb, ct, ctinfo);
424 if (ret != NF_ACCEPT) 422 if (ret != NF_ACCEPT)
425 return ret; 423 return ret;
@@ -978,6 +976,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_chain);
978EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); 976EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
979EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); 977EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
980EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); 978EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
979EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
981EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); 980EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
982#endif 981#endif
983EXPORT_SYMBOL(ip_conntrack_protocol_register); 982EXPORT_SYMBOL(ip_conntrack_protocol_register);