aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2009-06-13 06:26:29 -0400
committerPatrick McHardy <kaber@trash.net>2009-06-13 06:26:29 -0400
commita0891aa6a635f658f29bb061a00d6d3486941519 (patch)
tree40df3898f9f2e0892683c482d81deec4fd5a9257 /net
parent65cb9fda32be613216f601a330b311c3bd7a8436 (diff)
netfilter: conntrack: move event caching to conntrack extension infrastructure
This patch reworks the per-cpu event caching to use the conntrack extension infrastructure. The main drawback is that we consume more memory per conntrack if event delivery is enabled. This patch is required by the reliable event delivery that follows to this patch. BTW, this patch allows you to enable/disable event delivery via /proc/sys/net/netfilter/nf_conntrack_events in runtime, although you can still disable event caching as compilation option. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nf_conntrack_core.c15
-rw-r--r--net/netfilter/nf_conntrack_ecache.c185
-rw-r--r--net/netfilter/nf_conntrack_netlink.c49
3 files changed, 150 insertions, 99 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d8dffe7ab509..bcacbb5373c3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
39#include <net/netfilter/nf_conntrack_core.h> 39#include <net/netfilter/nf_conntrack_core.h>
40#include <net/netfilter/nf_conntrack_extend.h> 40#include <net/netfilter/nf_conntrack_extend.h>
41#include <net/netfilter/nf_conntrack_acct.h> 41#include <net/netfilter/nf_conntrack_acct.h>
42#include <net/netfilter/nf_conntrack_ecache.h>
42#include <net/netfilter/nf_nat.h> 43#include <net/netfilter/nf_nat.h>
43#include <net/netfilter/nf_nat_core.h> 44#include <net/netfilter/nf_nat_core.h>
44 45
@@ -577,6 +578,7 @@ init_conntrack(struct net *net,
577 } 578 }
578 579
579 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 580 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
581 nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
580 582
581 spin_lock_bh(&nf_conntrack_lock); 583 spin_lock_bh(&nf_conntrack_lock);
582 exp = nf_ct_find_expectation(net, tuple); 584 exp = nf_ct_find_expectation(net, tuple);
@@ -1031,8 +1033,6 @@ static void nf_conntrack_cleanup_init_net(void)
1031 1033
1032static void nf_conntrack_cleanup_net(struct net *net) 1034static void nf_conntrack_cleanup_net(struct net *net)
1033{ 1035{
1034 nf_ct_event_cache_flush(net);
1035 nf_conntrack_ecache_fini(net);
1036 i_see_dead_people: 1036 i_see_dead_people:
1037 nf_ct_iterate_cleanup(net, kill_all, NULL); 1037 nf_ct_iterate_cleanup(net, kill_all, NULL);
1038 if (atomic_read(&net->ct.count) != 0) { 1038 if (atomic_read(&net->ct.count) != 0) {
@@ -1045,6 +1045,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1045 1045
1046 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1046 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
1047 nf_conntrack_htable_size); 1047 nf_conntrack_htable_size);
1048 nf_conntrack_ecache_fini(net);
1048 nf_conntrack_acct_fini(net); 1049 nf_conntrack_acct_fini(net);
1049 nf_conntrack_expect_fini(net); 1050 nf_conntrack_expect_fini(net);
1050 free_percpu(net->ct.stat); 1051 free_percpu(net->ct.stat);
@@ -1220,9 +1221,6 @@ static int nf_conntrack_init_net(struct net *net)
1220 ret = -ENOMEM; 1221 ret = -ENOMEM;
1221 goto err_stat; 1222 goto err_stat;
1222 } 1223 }
1223 ret = nf_conntrack_ecache_init(net);
1224 if (ret < 0)
1225 goto err_ecache;
1226 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1224 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
1227 &net->ct.hash_vmalloc, 1); 1225 &net->ct.hash_vmalloc, 1);
1228 if (!net->ct.hash) { 1226 if (!net->ct.hash) {
@@ -1236,6 +1234,9 @@ static int nf_conntrack_init_net(struct net *net)
1236 ret = nf_conntrack_acct_init(net); 1234 ret = nf_conntrack_acct_init(net);
1237 if (ret < 0) 1235 if (ret < 0)
1238 goto err_acct; 1236 goto err_acct;
1237 ret = nf_conntrack_ecache_init(net);
1238 if (ret < 0)
1239 goto err_ecache;
1239 1240
1240 /* Set up fake conntrack: 1241 /* Set up fake conntrack:
1241 - to never be deleted, not in any hashes */ 1242 - to never be deleted, not in any hashes */
@@ -1248,14 +1249,14 @@ static int nf_conntrack_init_net(struct net *net)
1248 1249
1249 return 0; 1250 return 0;
1250 1251
1252err_ecache:
1253 nf_conntrack_acct_fini(net);
1251err_acct: 1254err_acct:
1252 nf_conntrack_expect_fini(net); 1255 nf_conntrack_expect_fini(net);
1253err_expect: 1256err_expect:
1254 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1257 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
1255 nf_conntrack_htable_size); 1258 nf_conntrack_htable_size);
1256err_hash: 1259err_hash:
1257 nf_conntrack_ecache_fini(net);
1258err_ecache:
1259 free_percpu(net->ct.stat); 1260 free_percpu(net->ct.stat);
1260err_stat: 1261err_stat:
1261 return ret; 1262 return ret;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5516b3e64b43..683281b78047 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -21,6 +21,7 @@
21 21
22#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_core.h> 23#include <net/netfilter/nf_conntrack_core.h>
24#include <net/netfilter/nf_conntrack_extend.h>
24 25
25static DEFINE_MUTEX(nf_ct_ecache_mutex); 26static DEFINE_MUTEX(nf_ct_ecache_mutex);
26 27
@@ -32,94 +33,38 @@ EXPORT_SYMBOL_GPL(nf_expect_event_cb);
32 33
33/* deliver cached events and clear cache entry - must be called with locally 34/* deliver cached events and clear cache entry - must be called with locally
34 * disabled softirqs */ 35 * disabled softirqs */
35static inline void 36void nf_ct_deliver_cached_events(struct nf_conn *ct)
36__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
37{ 37{
38 unsigned long events;
38 struct nf_ct_event_notifier *notify; 39 struct nf_ct_event_notifier *notify;
40 struct nf_conntrack_ecache *e;
39 41
40 rcu_read_lock(); 42 rcu_read_lock();
41 notify = rcu_dereference(nf_conntrack_event_cb); 43 notify = rcu_dereference(nf_conntrack_event_cb);
42 if (notify == NULL) 44 if (notify == NULL)
43 goto out_unlock; 45 goto out_unlock;
44 46
45 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) 47 e = nf_ct_ecache_find(ct);
46 && ecache->events) { 48 if (e == NULL)
49 goto out_unlock;
50
51 events = xchg(&e->cache, 0);
52
53 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) {
47 struct nf_ct_event item = { 54 struct nf_ct_event item = {
48 .ct = ecache->ct, 55 .ct = ct,
49 .pid = 0, 56 .pid = 0,
50 .report = 0 57 .report = 0
51 }; 58 };
52 59
53 notify->fcn(ecache->events, &item); 60 notify->fcn(events, &item);
54 } 61 }
55 62
56 ecache->events = 0;
57 nf_ct_put(ecache->ct);
58 ecache->ct = NULL;
59
60out_unlock: 63out_unlock:
61 rcu_read_unlock(); 64 rcu_read_unlock();
62} 65}
63
64/* Deliver all cached events for a particular conntrack. This is called
65 * by code prior to async packet handling for freeing the skb */
66void nf_ct_deliver_cached_events(const struct nf_conn *ct)
67{
68 struct net *net = nf_ct_net(ct);
69 struct nf_conntrack_ecache *ecache;
70
71 local_bh_disable();
72 ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
73 if (ecache->ct == ct)
74 __nf_ct_deliver_cached_events(ecache);
75 local_bh_enable();
76}
77EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 66EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
78 67
79/* Deliver cached events for old pending events, if current conntrack != old */
80void __nf_ct_event_cache_init(struct nf_conn *ct)
81{
82 struct net *net = nf_ct_net(ct);
83 struct nf_conntrack_ecache *ecache;
84
85 /* take care of delivering potentially old events */
86 ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
87 BUG_ON(ecache->ct == ct);
88 if (ecache->ct)
89 __nf_ct_deliver_cached_events(ecache);
90 /* initialize for this conntrack/packet */
91 ecache->ct = ct;
92 nf_conntrack_get(&ct->ct_general);
93}
94EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
95
96/* flush the event cache - touches other CPU's data and must not be called
97 * while packets are still passing through the code */
98void nf_ct_event_cache_flush(struct net *net)
99{
100 struct nf_conntrack_ecache *ecache;
101 int cpu;
102
103 for_each_possible_cpu(cpu) {
104 ecache = per_cpu_ptr(net->ct.ecache, cpu);
105 if (ecache->ct)
106 nf_ct_put(ecache->ct);
107 }
108}
109
110int nf_conntrack_ecache_init(struct net *net)
111{
112 net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
113 if (!net->ct.ecache)
114 return -ENOMEM;
115 return 0;
116}
117
118void nf_conntrack_ecache_fini(struct net *net)
119{
120 free_percpu(net->ct.ecache);
121}
122
123int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) 68int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
124{ 69{
125 int ret = 0; 70 int ret = 0;
@@ -185,3 +130,107 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
185 mutex_unlock(&nf_ct_ecache_mutex); 130 mutex_unlock(&nf_ct_ecache_mutex);
186} 131}
187EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 132EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
133
134#define NF_CT_EVENTS_DEFAULT 1
135static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
136
137#ifdef CONFIG_SYSCTL
138static struct ctl_table event_sysctl_table[] = {
139 {
140 .ctl_name = CTL_UNNUMBERED,
141 .procname = "nf_conntrack_events",
142 .data = &init_net.ct.sysctl_events,
143 .maxlen = sizeof(unsigned int),
144 .mode = 0644,
145 .proc_handler = proc_dointvec,
146 },
147 {}
148};
149#endif /* CONFIG_SYSCTL */
150
151static struct nf_ct_ext_type event_extend __read_mostly = {
152 .len = sizeof(struct nf_conntrack_ecache),
153 .align = __alignof__(struct nf_conntrack_ecache),
154 .id = NF_CT_EXT_ECACHE,
155};
156
157#ifdef CONFIG_SYSCTL
158static int nf_conntrack_event_init_sysctl(struct net *net)
159{
160 struct ctl_table *table;
161
162 table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
163 GFP_KERNEL);
164 if (!table)
165 goto out;
166
167 table[0].data = &net->ct.sysctl_events;
168
169 net->ct.event_sysctl_header =
170 register_net_sysctl_table(net,
171 nf_net_netfilter_sysctl_path, table);
172 if (!net->ct.event_sysctl_header) {
173 printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
174 goto out_register;
175 }
176 return 0;
177
178out_register:
179 kfree(table);
180out:
181 return -ENOMEM;
182}
183
184static void nf_conntrack_event_fini_sysctl(struct net *net)
185{
186 struct ctl_table *table;
187
188 table = net->ct.event_sysctl_header->ctl_table_arg;
189 unregister_net_sysctl_table(net->ct.event_sysctl_header);
190 kfree(table);
191}
192#else
193static int nf_conntrack_event_init_sysctl(struct net *net)
194{
195 return 0;
196}
197
198static void nf_conntrack_event_fini_sysctl(struct net *net)
199{
200}
201#endif /* CONFIG_SYSCTL */
202
203int nf_conntrack_ecache_init(struct net *net)
204{
205 int ret;
206
207 net->ct.sysctl_events = nf_ct_events;
208
209 if (net_eq(net, &init_net)) {
210 ret = nf_ct_extend_register(&event_extend);
211 if (ret < 0) {
212 printk(KERN_ERR "nf_ct_event: Unable to register "
213 "event extension.\n");
214 goto out_extend_register;
215 }
216 }
217
218 ret = nf_conntrack_event_init_sysctl(net);
219 if (ret < 0)
220 goto out_sysctl;
221
222 return 0;
223
224out_sysctl:
225 if (net_eq(net, &init_net))
226 nf_ct_extend_unregister(&event_extend);
227out_extend_register:
228 return ret;
229}
230
231void nf_conntrack_ecache_fini(struct net *net)
232{
233 nf_conntrack_event_fini_sysctl(net);
234 if (net_eq(net, &init_net))
235 nf_ct_extend_unregister(&event_extend);
236}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e503ada5728..19706eff1647 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -468,10 +468,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
468 if (ct == &nf_conntrack_untracked) 468 if (ct == &nf_conntrack_untracked)
469 return 0; 469 return 0;
470 470
471 if (events & IPCT_DESTROY) { 471 if (events & (1 << IPCT_DESTROY)) {
472 type = IPCTNL_MSG_CT_DELETE; 472 type = IPCTNL_MSG_CT_DELETE;
473 group = NFNLGRP_CONNTRACK_DESTROY; 473 group = NFNLGRP_CONNTRACK_DESTROY;
474 } else if (events & (IPCT_NEW | IPCT_RELATED)) { 474 } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
475 type = IPCTNL_MSG_CT_NEW; 475 type = IPCTNL_MSG_CT_NEW;
476 flags = NLM_F_CREATE|NLM_F_EXCL; 476 flags = NLM_F_CREATE|NLM_F_EXCL;
477 group = NFNLGRP_CONNTRACK_NEW; 477 group = NFNLGRP_CONNTRACK_NEW;
@@ -519,7 +519,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
519 if (ctnetlink_dump_status(skb, ct) < 0) 519 if (ctnetlink_dump_status(skb, ct) < 0)
520 goto nla_put_failure; 520 goto nla_put_failure;
521 521
522 if (events & IPCT_DESTROY) { 522 if (events & (1 << IPCT_DESTROY)) {
523 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 523 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
524 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 524 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
525 goto nla_put_failure; 525 goto nla_put_failure;
@@ -527,31 +527,31 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
527 if (ctnetlink_dump_timeout(skb, ct) < 0) 527 if (ctnetlink_dump_timeout(skb, ct) < 0)
528 goto nla_put_failure; 528 goto nla_put_failure;
529 529
530 if (events & IPCT_PROTOINFO 530 if (events & (1 << IPCT_PROTOINFO)
531 && ctnetlink_dump_protoinfo(skb, ct) < 0) 531 && ctnetlink_dump_protoinfo(skb, ct) < 0)
532 goto nla_put_failure; 532 goto nla_put_failure;
533 533
534 if ((events & IPCT_HELPER || nfct_help(ct)) 534 if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
535 && ctnetlink_dump_helpinfo(skb, ct) < 0) 535 && ctnetlink_dump_helpinfo(skb, ct) < 0)
536 goto nla_put_failure; 536 goto nla_put_failure;
537 537
538#ifdef CONFIG_NF_CONNTRACK_SECMARK 538#ifdef CONFIG_NF_CONNTRACK_SECMARK
539 if ((events & IPCT_SECMARK || ct->secmark) 539 if ((events & (1 << IPCT_SECMARK) || ct->secmark)
540 && ctnetlink_dump_secmark(skb, ct) < 0) 540 && ctnetlink_dump_secmark(skb, ct) < 0)
541 goto nla_put_failure; 541 goto nla_put_failure;
542#endif 542#endif
543 543
544 if (events & IPCT_RELATED && 544 if (events & (1 << IPCT_RELATED) &&
545 ctnetlink_dump_master(skb, ct) < 0) 545 ctnetlink_dump_master(skb, ct) < 0)
546 goto nla_put_failure; 546 goto nla_put_failure;
547 547
548 if (events & IPCT_NATSEQADJ && 548 if (events & (1 << IPCT_NATSEQADJ) &&
549 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 549 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
550 goto nla_put_failure; 550 goto nla_put_failure;
551 } 551 }
552 552
553#ifdef CONFIG_NF_CONNTRACK_MARK 553#ifdef CONFIG_NF_CONNTRACK_MARK
554 if ((events & IPCT_MARK || ct->mark) 554 if ((events & (1 << IPCT_MARK) || ct->mark)
555 && ctnetlink_dump_mark(skb, ct) < 0) 555 && ctnetlink_dump_mark(skb, ct) < 0)
556 goto nla_put_failure; 556 goto nla_put_failure;
557#endif 557#endif
@@ -1253,6 +1253,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1253 } 1253 }
1254 1254
1255 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1255 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1256 nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
1256 1257
1257#if defined(CONFIG_NF_CONNTRACK_MARK) 1258#if defined(CONFIG_NF_CONNTRACK_MARK)
1258 if (cda[CTA_MARK]) 1259 if (cda[CTA_MARK])
@@ -1340,13 +1341,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1340 else 1341 else
1341 events = IPCT_NEW; 1342 events = IPCT_NEW;
1342 1343
1343 nf_conntrack_event_report(IPCT_STATUS | 1344 nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
1344 IPCT_HELPER | 1345 (1 << IPCT_HELPER) |
1345 IPCT_PROTOINFO | 1346 (1 << IPCT_PROTOINFO) |
1346 IPCT_NATSEQADJ | 1347 (1 << IPCT_NATSEQADJ) |
1347 IPCT_MARK | events, 1348 (1 << IPCT_MARK) | events,
1348 ct, NETLINK_CB(skb).pid, 1349 ct, NETLINK_CB(skb).pid,
1349 nlmsg_report(nlh)); 1350 nlmsg_report(nlh));
1350 nf_ct_put(ct); 1351 nf_ct_put(ct);
1351 } else 1352 } else
1352 spin_unlock_bh(&nf_conntrack_lock); 1353 spin_unlock_bh(&nf_conntrack_lock);
@@ -1365,13 +1366,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1365 if (err == 0) { 1366 if (err == 0) {
1366 nf_conntrack_get(&ct->ct_general); 1367 nf_conntrack_get(&ct->ct_general);
1367 spin_unlock_bh(&nf_conntrack_lock); 1368 spin_unlock_bh(&nf_conntrack_lock);
1368 nf_conntrack_event_report(IPCT_STATUS | 1369 nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
1369 IPCT_HELPER | 1370 (1 << IPCT_HELPER) |
1370 IPCT_PROTOINFO | 1371 (1 << IPCT_PROTOINFO) |
1371 IPCT_NATSEQADJ | 1372 (1 << IPCT_NATSEQADJ) |
1372 IPCT_MARK, 1373 (1 << IPCT_MARK),
1373 ct, NETLINK_CB(skb).pid, 1374 ct, NETLINK_CB(skb).pid,
1374 nlmsg_report(nlh)); 1375 nlmsg_report(nlh));
1375 nf_ct_put(ct); 1376 nf_ct_put(ct);
1376 } else 1377 } else
1377 spin_unlock_bh(&nf_conntrack_lock); 1378 spin_unlock_bh(&nf_conntrack_lock);
@@ -1515,7 +1516,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1515 unsigned int type; 1516 unsigned int type;
1516 int flags = 0; 1517 int flags = 0;
1517 1518
1518 if (events & IPEXP_NEW) { 1519 if (events & (1 << IPEXP_NEW)) {
1519 type = IPCTNL_MSG_EXP_NEW; 1520 type = IPCTNL_MSG_EXP_NEW;
1520 flags = NLM_F_CREATE|NLM_F_EXCL; 1521 flags = NLM_F_CREATE|NLM_F_EXCL;
1521 } else 1522 } else