aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/list_nulls.h18
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h175
-rw-r--r--include/net/netfilter/nf_conntrack_extend.h2
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h2
-rw-r--r--include/net/netns/conntrack.h7
-rw-r--r--net/netfilter/nf_conntrack_core.c132
-rw-r--r--net/netfilter/nf_conntrack_ecache.c209
-rw-r--r--net/netfilter/nf_conntrack_helper.c14
-rw-r--r--net/netfilter/nf_conntrack_netlink.c68
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/x_tables.c16
12 files changed, 431 insertions, 220 deletions
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index 93150ecf3ea4..5d10ae364b5e 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -56,6 +56,18 @@ static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
56 return is_a_nulls(h->first); 56 return is_a_nulls(h->first);
57} 57}
58 58
59static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
60 struct hlist_nulls_head *h)
61{
62 struct hlist_nulls_node *first = h->first;
63
64 n->next = first;
65 n->pprev = &h->first;
66 h->first = n;
67 if (!is_a_nulls(first))
68 first->pprev = &n->next;
69}
70
59static inline void __hlist_nulls_del(struct hlist_nulls_node *n) 71static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
60{ 72{
61 struct hlist_nulls_node *next = n->next; 73 struct hlist_nulls_node *next = n->next;
@@ -65,6 +77,12 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
65 next->pprev = pprev; 77 next->pprev = pprev;
66} 78}
67 79
80static inline void hlist_nulls_del(struct hlist_nulls_node *n)
81{
82 __hlist_nulls_del(n);
83 n->pprev = LIST_POISON2;
84}
85
68/** 86/**
69 * hlist_nulls_for_each_entry - iterate over list of given type 87 * hlist_nulls_for_each_entry - iterate over list of given type
70 * @tpos: the type * to use as a loop cursor. 88 * @tpos: the type * to use as a loop cursor.
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ecc79f959076..a632689b61b4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -201,6 +201,8 @@ extern struct nf_conntrack_tuple_hash *
201__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); 201__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
202 202
203extern void nf_conntrack_hash_insert(struct nf_conn *ct); 203extern void nf_conntrack_hash_insert(struct nf_conn *ct);
204extern void nf_ct_delete_from_lists(struct nf_conn *ct);
205extern void nf_ct_insert_dying_list(struct nf_conn *ct);
204 206
205extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); 207extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
206 208
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 1afb907e015a..4f20d58e2ab7 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -6,61 +6,54 @@
6#define _NF_CONNTRACK_ECACHE_H 6#define _NF_CONNTRACK_ECACHE_H
7#include <net/netfilter/nf_conntrack.h> 7#include <net/netfilter/nf_conntrack.h>
8 8
9#include <linux/interrupt.h>
10#include <net/net_namespace.h> 9#include <net/net_namespace.h>
11#include <net/netfilter/nf_conntrack_expect.h> 10#include <net/netfilter/nf_conntrack_expect.h>
11#include <linux/netfilter/nf_conntrack_common.h>
12#include <linux/netfilter/nf_conntrack_tuple_common.h>
13#include <net/netfilter/nf_conntrack_extend.h>
12 14
13/* Connection tracking event bits */ 15/* Connection tracking event types */
14enum ip_conntrack_events 16enum ip_conntrack_events
15{ 17{
16 /* New conntrack */ 18 IPCT_NEW = 0, /* new conntrack */
17 IPCT_NEW_BIT = 0, 19 IPCT_RELATED = 1, /* related conntrack */
18 IPCT_NEW = (1 << IPCT_NEW_BIT), 20 IPCT_DESTROY = 2, /* destroyed conntrack */
19 21 IPCT_STATUS = 3, /* status has changed */
20 /* Expected connection */ 22 IPCT_PROTOINFO = 4, /* protocol information has changed */
21 IPCT_RELATED_BIT = 1, 23 IPCT_HELPER = 5, /* new helper has been set */
22 IPCT_RELATED = (1 << IPCT_RELATED_BIT), 24 IPCT_MARK = 6, /* new mark has been set */
23 25 IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */
24 /* Destroyed conntrack */ 26 IPCT_SECMARK = 8, /* new security mark has been set */
25 IPCT_DESTROY_BIT = 2, 27};
26 IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
27
28 /* Status has changed */
29 IPCT_STATUS_BIT = 3,
30 IPCT_STATUS = (1 << IPCT_STATUS_BIT),
31 28
32 /* Update of protocol info */ 29enum ip_conntrack_expect_events {
33 IPCT_PROTOINFO_BIT = 4, 30 IPEXP_NEW = 0, /* new expectation */
34 IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), 31};
35 32
36 /* New helper for conntrack */ 33struct nf_conntrack_ecache {
37 IPCT_HELPER_BIT = 5, 34 unsigned long cache; /* bitops want long */
38 IPCT_HELPER = (1 << IPCT_HELPER_BIT), 35 unsigned long missed; /* missed events */
36 u32 pid; /* netlink pid of destroyer */
37};
39 38
40 /* Mark is set */ 39static inline struct nf_conntrack_ecache *
41 IPCT_MARK_BIT = 6, 40nf_ct_ecache_find(const struct nf_conn *ct)
42 IPCT_MARK = (1 << IPCT_MARK_BIT), 41{
42 return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
43}
43 44
44 /* NAT sequence adjustment */ 45static inline struct nf_conntrack_ecache *
45 IPCT_NATSEQADJ_BIT = 7, 46nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp)
46 IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT), 47{
48 struct net *net = nf_ct_net(ct);
47 49
48 /* Secmark is set */ 50 if (!net->ct.sysctl_events)
49 IPCT_SECMARK_BIT = 8, 51 return NULL;
50 IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
51};
52 52
53enum ip_conntrack_expect_events { 53 return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
54 IPEXP_NEW_BIT = 0,
55 IPEXP_NEW = (1 << IPEXP_NEW_BIT),
56}; 54};
57 55
58#ifdef CONFIG_NF_CONNTRACK_EVENTS 56#ifdef CONFIG_NF_CONNTRACK_EVENTS
59struct nf_conntrack_ecache {
60 struct nf_conn *ct;
61 unsigned int events;
62};
63
64/* This structure is passed to event handler */ 57/* This structure is passed to event handler */
65struct nf_ct_event { 58struct nf_ct_event {
66 struct nf_conn *ct; 59 struct nf_conn *ct;
@@ -76,53 +69,88 @@ extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
76extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); 69extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
77extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); 70extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
78 71
79extern void nf_ct_deliver_cached_events(const struct nf_conn *ct); 72extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
80extern void __nf_ct_event_cache_init(struct nf_conn *ct);
81extern void nf_ct_event_cache_flush(struct net *net);
82 73
83static inline void 74static inline void
84nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) 75nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
85{ 76{
86 struct net *net = nf_ct_net(ct); 77 struct nf_conntrack_ecache *e;
87 struct nf_conntrack_ecache *ecache; 78
88 79 if (nf_conntrack_event_cb == NULL)
89 local_bh_disable(); 80 return;
90 ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); 81
91 if (ct != ecache->ct) 82 e = nf_ct_ecache_find(ct);
92 __nf_ct_event_cache_init(ct); 83 if (e == NULL)
93 ecache->events |= event; 84 return;
94 local_bh_enable(); 85
86 set_bit(event, &e->cache);
95} 87}
96 88
97static inline void 89static inline int
98nf_conntrack_event_report(enum ip_conntrack_events event, 90nf_conntrack_eventmask_report(unsigned int eventmask,
99 struct nf_conn *ct, 91 struct nf_conn *ct,
100 u32 pid, 92 u32 pid,
101 int report) 93 int report)
102{ 94{
95 int ret = 0;
96 struct net *net = nf_ct_net(ct);
103 struct nf_ct_event_notifier *notify; 97 struct nf_ct_event_notifier *notify;
98 struct nf_conntrack_ecache *e;
104 99
105 rcu_read_lock(); 100 rcu_read_lock();
106 notify = rcu_dereference(nf_conntrack_event_cb); 101 notify = rcu_dereference(nf_conntrack_event_cb);
107 if (notify == NULL) 102 if (notify == NULL)
108 goto out_unlock; 103 goto out_unlock;
109 104
105 if (!net->ct.sysctl_events)
106 goto out_unlock;
107
108 e = nf_ct_ecache_find(ct);
109 if (e == NULL)
110 goto out_unlock;
111
110 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { 112 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
111 struct nf_ct_event item = { 113 struct nf_ct_event item = {
112 .ct = ct, 114 .ct = ct,
113 .pid = pid, 115 .pid = e->pid ? e->pid : pid,
114 .report = report 116 .report = report
115 }; 117 };
116 notify->fcn(event, &item); 118 /* This is a resent of a destroy event? If so, skip missed */
119 unsigned long missed = e->pid ? 0 : e->missed;
120
121 ret = notify->fcn(eventmask | missed, &item);
122 if (unlikely(ret < 0 || missed)) {
123 spin_lock_bh(&ct->lock);
124 if (ret < 0) {
125 /* This is a destroy event that has been
126 * triggered by a process, we store the PID
127 * to include it in the retransmission. */
128 if (eventmask & (1 << IPCT_DESTROY) &&
129 e->pid == 0 && pid != 0)
130 e->pid = pid;
131 else
132 e->missed |= eventmask;
133 } else
134 e->missed &= ~missed;
135 spin_unlock_bh(&ct->lock);
136 }
117 } 137 }
118out_unlock: 138out_unlock:
119 rcu_read_unlock(); 139 rcu_read_unlock();
140 return ret;
120} 141}
121 142
122static inline void 143static inline int
144nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
145 u32 pid, int report)
146{
147 return nf_conntrack_eventmask_report(1 << event, ct, pid, report);
148}
149
150static inline int
123nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) 151nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
124{ 152{
125 nf_conntrack_event_report(event, ct, 0, 0); 153 return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
126} 154}
127 155
128struct nf_exp_event { 156struct nf_exp_event {
@@ -145,6 +173,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
145 u32 pid, 173 u32 pid,
146 int report) 174 int report)
147{ 175{
176 struct net *net = nf_ct_exp_net(exp);
148 struct nf_exp_event_notifier *notify; 177 struct nf_exp_event_notifier *notify;
149 178
150 rcu_read_lock(); 179 rcu_read_lock();
@@ -152,13 +181,16 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
152 if (notify == NULL) 181 if (notify == NULL)
153 goto out_unlock; 182 goto out_unlock;
154 183
184 if (!net->ct.sysctl_events)
185 goto out_unlock;
186
155 { 187 {
156 struct nf_exp_event item = { 188 struct nf_exp_event item = {
157 .exp = exp, 189 .exp = exp,
158 .pid = pid, 190 .pid = pid,
159 .report = report 191 .report = report
160 }; 192 };
161 notify->fcn(event, &item); 193 notify->fcn(1 << event, &item);
162 } 194 }
163out_unlock: 195out_unlock:
164 rcu_read_unlock(); 196 rcu_read_unlock();
@@ -178,12 +210,16 @@ extern void nf_conntrack_ecache_fini(struct net *net);
178 210
179static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, 211static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
180 struct nf_conn *ct) {} 212 struct nf_conn *ct) {}
181static inline void nf_conntrack_event(enum ip_conntrack_events event, 213static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
182 struct nf_conn *ct) {} 214 struct nf_conn *ct,
183static inline void nf_conntrack_event_report(enum ip_conntrack_events event, 215 u32 pid,
184 struct nf_conn *ct, 216 int report) { return 0; }
185 u32 pid, 217static inline int nf_conntrack_event(enum ip_conntrack_events event,
186 int report) {} 218 struct nf_conn *ct) { return 0; }
219static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
220 struct nf_conn *ct,
221 u32 pid,
222 int report) { return 0; }
187static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} 223static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
188static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, 224static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
189 struct nf_conntrack_expect *exp) {} 225 struct nf_conntrack_expect *exp) {}
@@ -191,7 +227,6 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
191 struct nf_conntrack_expect *exp, 227 struct nf_conntrack_expect *exp,
192 u32 pid, 228 u32 pid,
193 int report) {} 229 int report) {}
194static inline void nf_ct_event_cache_flush(struct net *net) {}
195 230
196static inline int nf_conntrack_ecache_init(struct net *net) 231static inline int nf_conntrack_ecache_init(struct net *net)
197{ 232{
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index da8ee52613a5..7f8fc5d123c5 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -8,12 +8,14 @@ enum nf_ct_ext_id
8 NF_CT_EXT_HELPER, 8 NF_CT_EXT_HELPER,
9 NF_CT_EXT_NAT, 9 NF_CT_EXT_NAT,
10 NF_CT_EXT_ACCT, 10 NF_CT_EXT_ACCT,
11 NF_CT_EXT_ECACHE,
11 NF_CT_EXT_NUM, 12 NF_CT_EXT_NUM,
12}; 13};
13 14
14#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help 15#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
15#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat 16#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
16#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter 17#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
18#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
17 19
18/* Extensions: optional stuff which isn't permanently in struct. */ 20/* Extensions: optional stuff which isn't permanently in struct. */
19struct nf_ct_ext { 21struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index ee2a4b369a04..1b7068000927 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -50,6 +50,8 @@ extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
50 50
51extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); 51extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
52 52
53extern void nf_ct_helper_destroy(struct nf_conn *ct);
54
53static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) 55static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
54{ 56{
55 return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); 57 return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9dc58402bc09..ba1ba0c5efd1 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -14,16 +14,17 @@ struct netns_ct {
14 struct hlist_nulls_head *hash; 14 struct hlist_nulls_head *hash;
15 struct hlist_head *expect_hash; 15 struct hlist_head *expect_hash;
16 struct hlist_nulls_head unconfirmed; 16 struct hlist_nulls_head unconfirmed;
17 struct hlist_nulls_head dying;
17 struct ip_conntrack_stat *stat; 18 struct ip_conntrack_stat *stat;
18#ifdef CONFIG_NF_CONNTRACK_EVENTS 19 int sysctl_events;
19 struct nf_conntrack_ecache *ecache; 20 unsigned int sysctl_events_retry_timeout;
20#endif
21 int sysctl_acct; 21 int sysctl_acct;
22 int sysctl_checksum; 22 int sysctl_checksum;
23 unsigned int sysctl_log_invalid; /* Log invalid packets */ 23 unsigned int sysctl_log_invalid; /* Log invalid packets */
24#ifdef CONFIG_SYSCTL 24#ifdef CONFIG_SYSCTL
25 struct ctl_table_header *sysctl_header; 25 struct ctl_table_header *sysctl_header;
26 struct ctl_table_header *acct_sysctl_header; 26 struct ctl_table_header *acct_sysctl_header;
27 struct ctl_table_header *event_sysctl_header;
27#endif 28#endif
28 int hash_vmalloc; 29 int hash_vmalloc;
29 int expect_vmalloc; 30 int expect_vmalloc;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index edf95695e0aa..5f72b94b4918 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
39#include <net/netfilter/nf_conntrack_core.h> 39#include <net/netfilter/nf_conntrack_core.h>
40#include <net/netfilter/nf_conntrack_extend.h> 40#include <net/netfilter/nf_conntrack_extend.h>
41#include <net/netfilter/nf_conntrack_acct.h> 41#include <net/netfilter/nf_conntrack_acct.h>
42#include <net/netfilter/nf_conntrack_ecache.h>
42#include <net/netfilter/nf_nat.h> 43#include <net/netfilter/nf_nat.h>
43#include <net/netfilter/nf_nat_core.h> 44#include <net/netfilter/nf_nat_core.h>
44 45
@@ -182,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
182 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 183 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
183 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 184 NF_CT_ASSERT(!timer_pending(&ct->timeout));
184 185
185 if (!test_bit(IPS_DYING_BIT, &ct->status))
186 nf_conntrack_event(IPCT_DESTROY, ct);
187 set_bit(IPS_DYING_BIT, &ct->status);
188
189 /* To make sure we don't get any weird locking issues here: 186 /* To make sure we don't get any weird locking issues here:
190 * destroy_conntrack() MUST NOT be called with a write lock 187 * destroy_conntrack() MUST NOT be called with a write lock
191 * to nf_conntrack_lock!!! -HW */ 188 * to nf_conntrack_lock!!! -HW */
@@ -219,27 +216,70 @@ destroy_conntrack(struct nf_conntrack *nfct)
219 nf_conntrack_free(ct); 216 nf_conntrack_free(ct);
220} 217}
221 218
222static void death_by_timeout(unsigned long ul_conntrack) 219void nf_ct_delete_from_lists(struct nf_conn *ct)
223{ 220{
224 struct nf_conn *ct = (void *)ul_conntrack;
225 struct net *net = nf_ct_net(ct); 221 struct net *net = nf_ct_net(ct);
226 struct nf_conn_help *help = nfct_help(ct);
227 struct nf_conntrack_helper *helper;
228
229 if (help) {
230 rcu_read_lock();
231 helper = rcu_dereference(help->helper);
232 if (helper && helper->destroy)
233 helper->destroy(ct);
234 rcu_read_unlock();
235 }
236 222
223 nf_ct_helper_destroy(ct);
237 spin_lock_bh(&nf_conntrack_lock); 224 spin_lock_bh(&nf_conntrack_lock);
238 /* Inside lock so preempt is disabled on module removal path. 225 /* Inside lock so preempt is disabled on module removal path.
239 * Otherwise we can get spurious warnings. */ 226 * Otherwise we can get spurious warnings. */
240 NF_CT_STAT_INC(net, delete_list); 227 NF_CT_STAT_INC(net, delete_list);
241 clean_from_lists(ct); 228 clean_from_lists(ct);
242 spin_unlock_bh(&nf_conntrack_lock); 229 spin_unlock_bh(&nf_conntrack_lock);
230}
231EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
232
233static void death_by_event(unsigned long ul_conntrack)
234{
235 struct nf_conn *ct = (void *)ul_conntrack;
236 struct net *net = nf_ct_net(ct);
237
238 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
239 /* bad luck, let's retry again */
240 ct->timeout.expires = jiffies +
241 (random32() % net->ct.sysctl_events_retry_timeout);
242 add_timer(&ct->timeout);
243 return;
244 }
245 /* we've got the event delivered, now it's dying */
246 set_bit(IPS_DYING_BIT, &ct->status);
247 spin_lock(&nf_conntrack_lock);
248 hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
249 spin_unlock(&nf_conntrack_lock);
250 nf_ct_put(ct);
251}
252
253void nf_ct_insert_dying_list(struct nf_conn *ct)
254{
255 struct net *net = nf_ct_net(ct);
256
257 /* add this conntrack to the dying list */
258 spin_lock_bh(&nf_conntrack_lock);
259 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
260 &net->ct.dying);
261 spin_unlock_bh(&nf_conntrack_lock);
262 /* set a new timer to retry event delivery */
263 setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
264 ct->timeout.expires = jiffies +
265 (random32() % net->ct.sysctl_events_retry_timeout);
266 add_timer(&ct->timeout);
267}
268EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
269
270static void death_by_timeout(unsigned long ul_conntrack)
271{
272 struct nf_conn *ct = (void *)ul_conntrack;
273
274 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
275 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
276 /* destroy event was not delivered */
277 nf_ct_delete_from_lists(ct);
278 nf_ct_insert_dying_list(ct);
279 return;
280 }
281 set_bit(IPS_DYING_BIT, &ct->status);
282 nf_ct_delete_from_lists(ct);
243 nf_ct_put(ct); 283 nf_ct_put(ct);
244} 284}
245 285
@@ -577,6 +617,7 @@ init_conntrack(struct net *net,
577 } 617 }
578 618
579 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 619 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
620 nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
580 621
581 spin_lock_bh(&nf_conntrack_lock); 622 spin_lock_bh(&nf_conntrack_lock);
582 exp = nf_ct_find_expectation(net, tuple); 623 exp = nf_ct_find_expectation(net, tuple);
@@ -807,8 +848,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
807 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 848 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
808 NF_CT_ASSERT(skb); 849 NF_CT_ASSERT(skb);
809 850
810 spin_lock_bh(&nf_conntrack_lock);
811
812 /* Only update if this is not a fixed timeout */ 851 /* Only update if this is not a fixed timeout */
813 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 852 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
814 goto acct; 853 goto acct;
@@ -822,11 +861,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
822 /* Only update the timeout if the new timeout is at least 861 /* Only update the timeout if the new timeout is at least
823 HZ jiffies from the old timeout. Need del_timer for race 862 HZ jiffies from the old timeout. Need del_timer for race
824 avoidance (may already be dying). */ 863 avoidance (may already be dying). */
825 if (newtime - ct->timeout.expires >= HZ 864 if (newtime - ct->timeout.expires >= HZ)
826 && del_timer(&ct->timeout)) { 865 mod_timer_pending(&ct->timeout, newtime);
827 ct->timeout.expires = newtime;
828 add_timer(&ct->timeout);
829 }
830 } 866 }
831 867
832acct: 868acct:
@@ -835,13 +871,13 @@ acct:
835 871
836 acct = nf_conn_acct_find(ct); 872 acct = nf_conn_acct_find(ct);
837 if (acct) { 873 if (acct) {
874 spin_lock_bh(&ct->lock);
838 acct[CTINFO2DIR(ctinfo)].packets++; 875 acct[CTINFO2DIR(ctinfo)].packets++;
839 acct[CTINFO2DIR(ctinfo)].bytes += 876 acct[CTINFO2DIR(ctinfo)].bytes +=
840 skb->len - skb_network_offset(skb); 877 skb->len - skb_network_offset(skb);
878 spin_unlock_bh(&ct->lock);
841 } 879 }
842 } 880 }
843
844 spin_unlock_bh(&nf_conntrack_lock);
845} 881}
846EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 882EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
847 883
@@ -853,14 +889,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
853 if (do_acct) { 889 if (do_acct) {
854 struct nf_conn_counter *acct; 890 struct nf_conn_counter *acct;
855 891
856 spin_lock_bh(&nf_conntrack_lock);
857 acct = nf_conn_acct_find(ct); 892 acct = nf_conn_acct_find(ct);
858 if (acct) { 893 if (acct) {
894 spin_lock_bh(&ct->lock);
859 acct[CTINFO2DIR(ctinfo)].packets++; 895 acct[CTINFO2DIR(ctinfo)].packets++;
860 acct[CTINFO2DIR(ctinfo)].bytes += 896 acct[CTINFO2DIR(ctinfo)].bytes +=
861 skb->len - skb_network_offset(skb); 897 skb->len - skb_network_offset(skb);
898 spin_unlock_bh(&ct->lock);
862 } 899 }
863 spin_unlock_bh(&nf_conntrack_lock);
864 } 900 }
865 901
866 if (del_timer(&ct->timeout)) { 902 if (del_timer(&ct->timeout)) {
@@ -994,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data)
994{ 1030{
995 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1031 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
996 1032
997 /* get_next_corpse sets the dying bit for us */ 1033 /* If we fail to deliver the event, death_by_timeout() will retry */
998 nf_conntrack_event_report(IPCT_DESTROY, 1034 if (nf_conntrack_event_report(IPCT_DESTROY, i,
999 i, 1035 fr->pid, fr->report) < 0)
1000 fr->pid, 1036 return 1;
1001 fr->report); 1037
1038 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1039 set_bit(IPS_DYING_BIT, &i->status);
1002 return 1; 1040 return 1;
1003} 1041}
1004 1042
@@ -1027,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
1027} 1065}
1028EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1066EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1029 1067
1068static void nf_ct_release_dying_list(void)
1069{
1070 struct nf_conntrack_tuple_hash *h;
1071 struct nf_conn *ct;
1072 struct hlist_nulls_node *n;
1073
1074 spin_lock_bh(&nf_conntrack_lock);
1075 hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
1076 ct = nf_ct_tuplehash_to_ctrack(h);
1077 /* never fails to remove them, no listeners at this point */
1078 nf_ct_kill(ct);
1079 }
1080 spin_unlock_bh(&nf_conntrack_lock);
1081}
1082
1030static void nf_conntrack_cleanup_init_net(void) 1083static void nf_conntrack_cleanup_init_net(void)
1031{ 1084{
1032 nf_conntrack_helper_fini(); 1085 nf_conntrack_helper_fini();
@@ -1036,10 +1089,9 @@ static void nf_conntrack_cleanup_init_net(void)
1036 1089
1037static void nf_conntrack_cleanup_net(struct net *net) 1090static void nf_conntrack_cleanup_net(struct net *net)
1038{ 1091{
1039 nf_ct_event_cache_flush(net);
1040 nf_conntrack_ecache_fini(net);
1041 i_see_dead_people: 1092 i_see_dead_people:
1042 nf_ct_iterate_cleanup(net, kill_all, NULL); 1093 nf_ct_iterate_cleanup(net, kill_all, NULL);
1094 nf_ct_release_dying_list();
1043 if (atomic_read(&net->ct.count) != 0) { 1095 if (atomic_read(&net->ct.count) != 0) {
1044 schedule(); 1096 schedule();
1045 goto i_see_dead_people; 1097 goto i_see_dead_people;
@@ -1050,6 +1102,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1050 1102
1051 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1103 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
1052 nf_conntrack_htable_size); 1104 nf_conntrack_htable_size);
1105 nf_conntrack_ecache_fini(net);
1053 nf_conntrack_acct_fini(net); 1106 nf_conntrack_acct_fini(net);
1054 nf_conntrack_expect_fini(net); 1107 nf_conntrack_expect_fini(net);
1055 free_percpu(net->ct.stat); 1108 free_percpu(net->ct.stat);
@@ -1220,14 +1273,12 @@ static int nf_conntrack_init_net(struct net *net)
1220 1273
1221 atomic_set(&net->ct.count, 0); 1274 atomic_set(&net->ct.count, 0);
1222 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); 1275 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
1276 INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
1223 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1277 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1224 if (!net->ct.stat) { 1278 if (!net->ct.stat) {
1225 ret = -ENOMEM; 1279 ret = -ENOMEM;
1226 goto err_stat; 1280 goto err_stat;
1227 } 1281 }
1228 ret = nf_conntrack_ecache_init(net);
1229 if (ret < 0)
1230 goto err_ecache;
1231 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1282 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
1232 &net->ct.hash_vmalloc, 1); 1283 &net->ct.hash_vmalloc, 1);
1233 if (!net->ct.hash) { 1284 if (!net->ct.hash) {
@@ -1241,6 +1292,9 @@ static int nf_conntrack_init_net(struct net *net)
1241 ret = nf_conntrack_acct_init(net); 1292 ret = nf_conntrack_acct_init(net);
1242 if (ret < 0) 1293 if (ret < 0)
1243 goto err_acct; 1294 goto err_acct;
1295 ret = nf_conntrack_ecache_init(net);
1296 if (ret < 0)
1297 goto err_ecache;
1244 1298
1245 /* Set up fake conntrack: 1299 /* Set up fake conntrack:
1246 - to never be deleted, not in any hashes */ 1300 - to never be deleted, not in any hashes */
@@ -1253,14 +1307,14 @@ static int nf_conntrack_init_net(struct net *net)
1253 1307
1254 return 0; 1308 return 0;
1255 1309
1310err_ecache:
1311 nf_conntrack_acct_fini(net);
1256err_acct: 1312err_acct:
1257 nf_conntrack_expect_fini(net); 1313 nf_conntrack_expect_fini(net);
1258err_expect: 1314err_expect:
1259 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1315 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
1260 nf_conntrack_htable_size); 1316 nf_conntrack_htable_size);
1261err_hash: 1317err_hash:
1262 nf_conntrack_ecache_fini(net);
1263err_ecache:
1264 free_percpu(net->ct.stat); 1318 free_percpu(net->ct.stat);
1265err_stat: 1319err_stat:
1266 return ret; 1320 return ret;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5516b3e64b43..aee560b4768d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -21,6 +21,7 @@
21 21
22#include <net/netfilter/nf_conntrack.h> 22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_core.h> 23#include <net/netfilter/nf_conntrack_core.h>
24#include <net/netfilter/nf_conntrack_extend.h>
24 25
25static DEFINE_MUTEX(nf_ct_ecache_mutex); 26static DEFINE_MUTEX(nf_ct_ecache_mutex);
26 27
@@ -32,94 +33,51 @@ EXPORT_SYMBOL_GPL(nf_expect_event_cb);
32 33
33/* deliver cached events and clear cache entry - must be called with locally 34/* deliver cached events and clear cache entry - must be called with locally
34 * disabled softirqs */ 35 * disabled softirqs */
35static inline void 36void nf_ct_deliver_cached_events(struct nf_conn *ct)
36__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
37{ 37{
38 unsigned long events;
38 struct nf_ct_event_notifier *notify; 39 struct nf_ct_event_notifier *notify;
40 struct nf_conntrack_ecache *e;
39 41
40 rcu_read_lock(); 42 rcu_read_lock();
41 notify = rcu_dereference(nf_conntrack_event_cb); 43 notify = rcu_dereference(nf_conntrack_event_cb);
42 if (notify == NULL) 44 if (notify == NULL)
43 goto out_unlock; 45 goto out_unlock;
44 46
45 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) 47 e = nf_ct_ecache_find(ct);
46 && ecache->events) { 48 if (e == NULL)
49 goto out_unlock;
50
51 events = xchg(&e->cache, 0);
52
53 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) {
47 struct nf_ct_event item = { 54 struct nf_ct_event item = {
48 .ct = ecache->ct, 55 .ct = ct,
49 .pid = 0, 56 .pid = 0,
50 .report = 0 57 .report = 0
51 }; 58 };
59 int ret;
60 /* We make a copy of the missed event cache without taking
61 * the lock, thus we may send missed events twice. However,
62 * this does not harm and it happens very rarely. */
63 unsigned long missed = e->missed;
52 64
53 notify->fcn(ecache->events, &item); 65 ret = notify->fcn(events | missed, &item);
66 if (unlikely(ret < 0 || missed)) {
67 spin_lock_bh(&ct->lock);
68 if (ret < 0)
69 e->missed |= events;
70 else
71 e->missed &= ~missed;
72 spin_unlock_bh(&ct->lock);
73 }
54 } 74 }
55 75
56 ecache->events = 0;
57 nf_ct_put(ecache->ct);
58 ecache->ct = NULL;
59
60out_unlock: 76out_unlock:
61 rcu_read_unlock(); 77 rcu_read_unlock();
62} 78}
63
64/* Deliver all cached events for a particular conntrack. This is called
65 * by code prior to async packet handling for freeing the skb */
66void nf_ct_deliver_cached_events(const struct nf_conn *ct)
67{
68 struct net *net = nf_ct_net(ct);
69 struct nf_conntrack_ecache *ecache;
70
71 local_bh_disable();
72 ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
73 if (ecache->ct == ct)
74 __nf_ct_deliver_cached_events(ecache);
75 local_bh_enable();
76}
77EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 79EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
78 80
79/* Deliver cached events for old pending events, if current conntrack != old */
80void __nf_ct_event_cache_init(struct nf_conn *ct)
81{
82 struct net *net = nf_ct_net(ct);
83 struct nf_conntrack_ecache *ecache;
84
85 /* take care of delivering potentially old events */
86 ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
87 BUG_ON(ecache->ct == ct);
88 if (ecache->ct)
89 __nf_ct_deliver_cached_events(ecache);
90 /* initialize for this conntrack/packet */
91 ecache->ct = ct;
92 nf_conntrack_get(&ct->ct_general);
93}
94EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
95
96/* flush the event cache - touches other CPU's data and must not be called
97 * while packets are still passing through the code */
98void nf_ct_event_cache_flush(struct net *net)
99{
100 struct nf_conntrack_ecache *ecache;
101 int cpu;
102
103 for_each_possible_cpu(cpu) {
104 ecache = per_cpu_ptr(net->ct.ecache, cpu);
105 if (ecache->ct)
106 nf_ct_put(ecache->ct);
107 }
108}
109
110int nf_conntrack_ecache_init(struct net *net)
111{
112 net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
113 if (!net->ct.ecache)
114 return -ENOMEM;
115 return 0;
116}
117
118void nf_conntrack_ecache_fini(struct net *net)
119{
120 free_percpu(net->ct.ecache);
121}
122
123int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) 81int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
124{ 82{
125 int ret = 0; 83 int ret = 0;
@@ -185,3 +143,118 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
185 mutex_unlock(&nf_ct_ecache_mutex); 143 mutex_unlock(&nf_ct_ecache_mutex);
186} 144}
187EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 145EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
146
147#define NF_CT_EVENTS_DEFAULT 1
148static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
149static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
150
151#ifdef CONFIG_SYSCTL
152static struct ctl_table event_sysctl_table[] = {
153 {
154 .ctl_name = CTL_UNNUMBERED,
155 .procname = "nf_conntrack_events",
156 .data = &init_net.ct.sysctl_events,
157 .maxlen = sizeof(unsigned int),
158 .mode = 0644,
159 .proc_handler = proc_dointvec,
160 },
161 {
162 .ctl_name = CTL_UNNUMBERED,
163 .procname = "nf_conntrack_events_retry_timeout",
164 .data = &init_net.ct.sysctl_events_retry_timeout,
165 .maxlen = sizeof(unsigned int),
166 .mode = 0644,
167 .proc_handler = proc_dointvec_jiffies,
168 },
169 {}
170};
171#endif /* CONFIG_SYSCTL */
172
173static struct nf_ct_ext_type event_extend __read_mostly = {
174 .len = sizeof(struct nf_conntrack_ecache),
175 .align = __alignof__(struct nf_conntrack_ecache),
176 .id = NF_CT_EXT_ECACHE,
177};
178
179#ifdef CONFIG_SYSCTL
180static int nf_conntrack_event_init_sysctl(struct net *net)
181{
182 struct ctl_table *table;
183
184 table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
185 GFP_KERNEL);
186 if (!table)
187 goto out;
188
189 table[0].data = &net->ct.sysctl_events;
190 table[1].data = &net->ct.sysctl_events_retry_timeout;
191
192 net->ct.event_sysctl_header =
193 register_net_sysctl_table(net,
194 nf_net_netfilter_sysctl_path, table);
195 if (!net->ct.event_sysctl_header) {
196 printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
197 goto out_register;
198 }
199 return 0;
200
201out_register:
202 kfree(table);
203out:
204 return -ENOMEM;
205}
206
207static void nf_conntrack_event_fini_sysctl(struct net *net)
208{
209 struct ctl_table *table;
210
211 table = net->ct.event_sysctl_header->ctl_table_arg;
212 unregister_net_sysctl_table(net->ct.event_sysctl_header);
213 kfree(table);
214}
215#else
216static int nf_conntrack_event_init_sysctl(struct net *net)
217{
218 return 0;
219}
220
221static void nf_conntrack_event_fini_sysctl(struct net *net)
222{
223}
224#endif /* CONFIG_SYSCTL */
225
226int nf_conntrack_ecache_init(struct net *net)
227{
228 int ret;
229
230 net->ct.sysctl_events = nf_ct_events;
231 net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
232
233 if (net_eq(net, &init_net)) {
234 ret = nf_ct_extend_register(&event_extend);
235 if (ret < 0) {
236 printk(KERN_ERR "nf_ct_event: Unable to register "
237 "event extension.\n");
238 goto out_extend_register;
239 }
240 }
241
242 ret = nf_conntrack_event_init_sysctl(net);
243 if (ret < 0)
244 goto out_sysctl;
245
246 return 0;
247
248out_sysctl:
249 if (net_eq(net, &init_net))
250 nf_ct_extend_unregister(&event_extend);
251out_extend_register:
252 return ret;
253}
254
255void nf_conntrack_ecache_fini(struct net *net)
256{
257 nf_conntrack_event_fini_sysctl(net);
258 if (net_eq(net, &init_net))
259 nf_ct_extend_unregister(&event_extend);
260}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0fa5a422959f..65c2a7bc3afc 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -136,6 +136,20 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
136 return 0; 136 return 0;
137} 137}
138 138
139void nf_ct_helper_destroy(struct nf_conn *ct)
140{
141 struct nf_conn_help *help = nfct_help(ct);
142 struct nf_conntrack_helper *helper;
143
144 if (help) {
145 rcu_read_lock();
146 helper = rcu_dereference(help->helper);
147 if (helper && helper->destroy)
148 helper->destroy(ct);
149 rcu_read_unlock();
150 }
151}
152
139int nf_conntrack_helper_register(struct nf_conntrack_helper *me) 153int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
140{ 154{
141 unsigned int h = helper_hash(&me->tuple); 155 unsigned int h = helper_hash(&me->tuple);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e503ada5728..49479d194570 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -463,15 +463,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
463 struct sk_buff *skb; 463 struct sk_buff *skb;
464 unsigned int type; 464 unsigned int type;
465 unsigned int flags = 0, group; 465 unsigned int flags = 0, group;
466 int err;
466 467
467 /* ignore our fake conntrack entry */ 468 /* ignore our fake conntrack entry */
468 if (ct == &nf_conntrack_untracked) 469 if (ct == &nf_conntrack_untracked)
469 return 0; 470 return 0;
470 471
471 if (events & IPCT_DESTROY) { 472 if (events & (1 << IPCT_DESTROY)) {
472 type = IPCTNL_MSG_CT_DELETE; 473 type = IPCTNL_MSG_CT_DELETE;
473 group = NFNLGRP_CONNTRACK_DESTROY; 474 group = NFNLGRP_CONNTRACK_DESTROY;
474 } else if (events & (IPCT_NEW | IPCT_RELATED)) { 475 } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
475 type = IPCTNL_MSG_CT_NEW; 476 type = IPCTNL_MSG_CT_NEW;
476 flags = NLM_F_CREATE|NLM_F_EXCL; 477 flags = NLM_F_CREATE|NLM_F_EXCL;
477 group = NFNLGRP_CONNTRACK_NEW; 478 group = NFNLGRP_CONNTRACK_NEW;
@@ -519,7 +520,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
519 if (ctnetlink_dump_status(skb, ct) < 0) 520 if (ctnetlink_dump_status(skb, ct) < 0)
520 goto nla_put_failure; 521 goto nla_put_failure;
521 522
522 if (events & IPCT_DESTROY) { 523 if (events & (1 << IPCT_DESTROY)) {
523 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 524 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
524 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 525 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
525 goto nla_put_failure; 526 goto nla_put_failure;
@@ -527,38 +528,41 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
527 if (ctnetlink_dump_timeout(skb, ct) < 0) 528 if (ctnetlink_dump_timeout(skb, ct) < 0)
528 goto nla_put_failure; 529 goto nla_put_failure;
529 530
530 if (events & IPCT_PROTOINFO 531 if (events & (1 << IPCT_PROTOINFO)
531 && ctnetlink_dump_protoinfo(skb, ct) < 0) 532 && ctnetlink_dump_protoinfo(skb, ct) < 0)
532 goto nla_put_failure; 533 goto nla_put_failure;
533 534
534 if ((events & IPCT_HELPER || nfct_help(ct)) 535 if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
535 && ctnetlink_dump_helpinfo(skb, ct) < 0) 536 && ctnetlink_dump_helpinfo(skb, ct) < 0)
536 goto nla_put_failure; 537 goto nla_put_failure;
537 538
538#ifdef CONFIG_NF_CONNTRACK_SECMARK 539#ifdef CONFIG_NF_CONNTRACK_SECMARK
539 if ((events & IPCT_SECMARK || ct->secmark) 540 if ((events & (1 << IPCT_SECMARK) || ct->secmark)
540 && ctnetlink_dump_secmark(skb, ct) < 0) 541 && ctnetlink_dump_secmark(skb, ct) < 0)
541 goto nla_put_failure; 542 goto nla_put_failure;
542#endif 543#endif
543 544
544 if (events & IPCT_RELATED && 545 if (events & (1 << IPCT_RELATED) &&
545 ctnetlink_dump_master(skb, ct) < 0) 546 ctnetlink_dump_master(skb, ct) < 0)
546 goto nla_put_failure; 547 goto nla_put_failure;
547 548
548 if (events & IPCT_NATSEQADJ && 549 if (events & (1 << IPCT_NATSEQADJ) &&
549 ctnetlink_dump_nat_seq_adj(skb, ct) < 0) 550 ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
550 goto nla_put_failure; 551 goto nla_put_failure;
551 } 552 }
552 553
553#ifdef CONFIG_NF_CONNTRACK_MARK 554#ifdef CONFIG_NF_CONNTRACK_MARK
554 if ((events & IPCT_MARK || ct->mark) 555 if ((events & (1 << IPCT_MARK) || ct->mark)
555 && ctnetlink_dump_mark(skb, ct) < 0) 556 && ctnetlink_dump_mark(skb, ct) < 0)
556 goto nla_put_failure; 557 goto nla_put_failure;
557#endif 558#endif
558 rcu_read_unlock(); 559 rcu_read_unlock();
559 560
560 nlmsg_end(skb, nlh); 561 nlmsg_end(skb, nlh);
561 nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); 562 err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
563 if (err == -ENOBUFS || err == -EAGAIN)
564 return -ENOBUFS;
565
562 return 0; 566 return 0;
563 567
564nla_put_failure: 568nla_put_failure:
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
798 } 802 }
799 } 803 }
800 804
801 nf_conntrack_event_report(IPCT_DESTROY, 805 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
802 ct, 806 NETLINK_CB(skb).pid,
803 NETLINK_CB(skb).pid, 807 nlmsg_report(nlh)) < 0) {
804 nlmsg_report(nlh)); 808 nf_ct_delete_from_lists(ct);
809 /* we failed to report the event, try later */
810 nf_ct_insert_dying_list(ct);
811 nf_ct_put(ct);
812 return 0;
813 }
805 814
806 /* death_by_timeout would report the event again */ 815 /* death_by_timeout would report the event again */
807 set_bit(IPS_DYING_BIT, &ct->status); 816 set_bit(IPS_DYING_BIT, &ct->status);
@@ -1253,6 +1262,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1253 } 1262 }
1254 1263
1255 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1264 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
1265 nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
1256 1266
1257#if defined(CONFIG_NF_CONNTRACK_MARK) 1267#if defined(CONFIG_NF_CONNTRACK_MARK)
1258 if (cda[CTA_MARK]) 1268 if (cda[CTA_MARK])
@@ -1340,13 +1350,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1340 else 1350 else
1341 events = IPCT_NEW; 1351 events = IPCT_NEW;
1342 1352
1343 nf_conntrack_event_report(IPCT_STATUS | 1353 nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
1344 IPCT_HELPER | 1354 (1 << IPCT_HELPER) |
1345 IPCT_PROTOINFO | 1355 (1 << IPCT_PROTOINFO) |
1346 IPCT_NATSEQADJ | 1356 (1 << IPCT_NATSEQADJ) |
1347 IPCT_MARK | events, 1357 (1 << IPCT_MARK) | events,
1348 ct, NETLINK_CB(skb).pid, 1358 ct, NETLINK_CB(skb).pid,
1349 nlmsg_report(nlh)); 1359 nlmsg_report(nlh));
1350 nf_ct_put(ct); 1360 nf_ct_put(ct);
1351 } else 1361 } else
1352 spin_unlock_bh(&nf_conntrack_lock); 1362 spin_unlock_bh(&nf_conntrack_lock);
@@ -1365,13 +1375,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1365 if (err == 0) { 1375 if (err == 0) {
1366 nf_conntrack_get(&ct->ct_general); 1376 nf_conntrack_get(&ct->ct_general);
1367 spin_unlock_bh(&nf_conntrack_lock); 1377 spin_unlock_bh(&nf_conntrack_lock);
1368 nf_conntrack_event_report(IPCT_STATUS | 1378 nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
1369 IPCT_HELPER | 1379 (1 << IPCT_HELPER) |
1370 IPCT_PROTOINFO | 1380 (1 << IPCT_PROTOINFO) |
1371 IPCT_NATSEQADJ | 1381 (1 << IPCT_NATSEQADJ) |
1372 IPCT_MARK, 1382 (1 << IPCT_MARK),
1373 ct, NETLINK_CB(skb).pid, 1383 ct, NETLINK_CB(skb).pid,
1374 nlmsg_report(nlh)); 1384 nlmsg_report(nlh));
1375 nf_ct_put(ct); 1385 nf_ct_put(ct);
1376 } else 1386 } else
1377 spin_unlock_bh(&nf_conntrack_lock); 1387 spin_unlock_bh(&nf_conntrack_lock);
@@ -1515,7 +1525,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1515 unsigned int type; 1525 unsigned int type;
1516 int flags = 0; 1526 int flags = 0;
1517 1527
1518 if (events & IPEXP_NEW) { 1528 if (events & (1 << IPEXP_NEW)) {
1519 type = IPCTNL_MSG_EXP_NEW; 1529 type = IPCTNL_MSG_EXP_NEW;
1520 flags = NLM_F_CREATE|NLM_F_EXCL; 1530 flags = NLM_F_CREATE|NLM_F_EXCL;
1521 } else 1531 } else
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index beb37311e1a5..2fefe147750a 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -248,14 +248,14 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
248 rcu_assign_pointer(nf_loggers[tindex], logger); 248 rcu_assign_pointer(nf_loggers[tindex], logger);
249 mutex_unlock(&nf_log_mutex); 249 mutex_unlock(&nf_log_mutex);
250 } else { 250 } else {
251 rcu_read_lock(); 251 mutex_lock(&nf_log_mutex);
252 logger = rcu_dereference(nf_loggers[tindex]); 252 logger = nf_loggers[tindex];
253 if (!logger) 253 if (!logger)
254 table->data = "NONE"; 254 table->data = "NONE";
255 else 255 else
256 table->data = logger->name; 256 table->data = logger->name;
257 r = proc_dostring(table, write, filp, buffer, lenp, ppos); 257 r = proc_dostring(table, write, filp, buffer, lenp, ppos);
258 rcu_read_unlock(); 258 mutex_unlock(&nf_log_mutex);
259 } 259 }
260 260
261 return r; 261 return r;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 46dba5f043d5..025d1a0af78b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -364,14 +364,14 @@ int xt_check_match(struct xt_mtchk_param *par,
364 * ebt_among is exempt from centralized matchsize checking 364 * ebt_among is exempt from centralized matchsize checking
365 * because it uses a dynamic-size data set. 365 * because it uses a dynamic-size data set.
366 */ 366 */
367 printk("%s_tables: %s match: invalid size %Zu != %u\n", 367 pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
368 xt_prefix[par->family], par->match->name, 368 xt_prefix[par->family], par->match->name,
369 XT_ALIGN(par->match->matchsize), size); 369 XT_ALIGN(par->match->matchsize), size);
370 return -EINVAL; 370 return -EINVAL;
371 } 371 }
372 if (par->match->table != NULL && 372 if (par->match->table != NULL &&
373 strcmp(par->match->table, par->table) != 0) { 373 strcmp(par->match->table, par->table) != 0) {
374 printk("%s_tables: %s match: only valid in %s table, not %s\n", 374 pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
375 xt_prefix[par->family], par->match->name, 375 xt_prefix[par->family], par->match->name,
376 par->match->table, par->table); 376 par->match->table, par->table);
377 return -EINVAL; 377 return -EINVAL;
@@ -379,7 +379,7 @@ int xt_check_match(struct xt_mtchk_param *par,
379 if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { 379 if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
380 char used[64], allow[64]; 380 char used[64], allow[64];
381 381
382 printk("%s_tables: %s match: used from hooks %s, but only " 382 pr_err("%s_tables: %s match: used from hooks %s, but only "
383 "valid from %s\n", 383 "valid from %s\n",
384 xt_prefix[par->family], par->match->name, 384 xt_prefix[par->family], par->match->name,
385 textify_hooks(used, sizeof(used), par->hook_mask), 385 textify_hooks(used, sizeof(used), par->hook_mask),
@@ -387,7 +387,7 @@ int xt_check_match(struct xt_mtchk_param *par,
387 return -EINVAL; 387 return -EINVAL;
388 } 388 }
389 if (par->match->proto && (par->match->proto != proto || inv_proto)) { 389 if (par->match->proto && (par->match->proto != proto || inv_proto)) {
390 printk("%s_tables: %s match: only valid for protocol %u\n", 390 pr_err("%s_tables: %s match: only valid for protocol %u\n",
391 xt_prefix[par->family], par->match->name, 391 xt_prefix[par->family], par->match->name,
392 par->match->proto); 392 par->match->proto);
393 return -EINVAL; 393 return -EINVAL;
@@ -514,14 +514,14 @@ int xt_check_target(struct xt_tgchk_param *par,
514 unsigned int size, u_int8_t proto, bool inv_proto) 514 unsigned int size, u_int8_t proto, bool inv_proto)
515{ 515{
516 if (XT_ALIGN(par->target->targetsize) != size) { 516 if (XT_ALIGN(par->target->targetsize) != size) {
517 printk("%s_tables: %s target: invalid size %Zu != %u\n", 517 pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
518 xt_prefix[par->family], par->target->name, 518 xt_prefix[par->family], par->target->name,
519 XT_ALIGN(par->target->targetsize), size); 519 XT_ALIGN(par->target->targetsize), size);
520 return -EINVAL; 520 return -EINVAL;
521 } 521 }
522 if (par->target->table != NULL && 522 if (par->target->table != NULL &&
523 strcmp(par->target->table, par->table) != 0) { 523 strcmp(par->target->table, par->table) != 0) {
524 printk("%s_tables: %s target: only valid in %s table, not %s\n", 524 pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
525 xt_prefix[par->family], par->target->name, 525 xt_prefix[par->family], par->target->name,
526 par->target->table, par->table); 526 par->target->table, par->table);
527 return -EINVAL; 527 return -EINVAL;
@@ -529,7 +529,7 @@ int xt_check_target(struct xt_tgchk_param *par,
529 if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { 529 if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
530 char used[64], allow[64]; 530 char used[64], allow[64];
531 531
532 printk("%s_tables: %s target: used from hooks %s, but only " 532 pr_err("%s_tables: %s target: used from hooks %s, but only "
533 "usable from %s\n", 533 "usable from %s\n",
534 xt_prefix[par->family], par->target->name, 534 xt_prefix[par->family], par->target->name,
535 textify_hooks(used, sizeof(used), par->hook_mask), 535 textify_hooks(used, sizeof(used), par->hook_mask),
@@ -537,7 +537,7 @@ int xt_check_target(struct xt_tgchk_param *par,
537 return -EINVAL; 537 return -EINVAL;
538 } 538 }
539 if (par->target->proto && (par->target->proto != proto || inv_proto)) { 539 if (par->target->proto && (par->target->proto != proto || inv_proto)) {
540 printk("%s_tables: %s target: only valid for protocol %u\n", 540 pr_err("%s_tables: %s target: only valid for protocol %u\n",
541 xt_prefix[par->family], par->target->name, 541 xt_prefix[par->family], par->target->name,
542 par->target->proto); 542 par->target->proto);
543 return -EINVAL; 543 return -EINVAL;