aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h61
-rw-r--r--include/net/netns/conntrack.h2
-rw-r--r--net/netfilter/nf_conntrack_core.c89
-rw-r--r--net/netfilter/nf_conntrack_ecache.c28
-rw-r--r--net/netfilter/nf_conntrack_netlink.c19
6 files changed, 166 insertions, 35 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ecc79f959076..a632689b61b4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -201,6 +201,8 @@ extern struct nf_conntrack_tuple_hash *
201__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); 201__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
202 202
203extern void nf_conntrack_hash_insert(struct nf_conn *ct); 203extern void nf_conntrack_hash_insert(struct nf_conn *ct);
204extern void nf_ct_delete_from_lists(struct nf_conn *ct);
205extern void nf_ct_insert_dying_list(struct nf_conn *ct);
204 206
205extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report); 207extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
206 208
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index e7ae297ba383..4f20d58e2ab7 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -32,6 +32,8 @@ enum ip_conntrack_expect_events {
32 32
33struct nf_conntrack_ecache { 33struct nf_conntrack_ecache {
34 unsigned long cache; /* bitops want long */ 34 unsigned long cache; /* bitops want long */
35 unsigned long missed; /* missed events */
36 u32 pid; /* netlink pid of destroyer */
35}; 37};
36 38
37static inline struct nf_conntrack_ecache * 39static inline struct nf_conntrack_ecache *
@@ -84,14 +86,16 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
84 set_bit(event, &e->cache); 86 set_bit(event, &e->cache);
85} 87}
86 88
87static inline void 89static inline int
88nf_conntrack_eventmask_report(unsigned int eventmask, 90nf_conntrack_eventmask_report(unsigned int eventmask,
89 struct nf_conn *ct, 91 struct nf_conn *ct,
90 u32 pid, 92 u32 pid,
91 int report) 93 int report)
92{ 94{
95 int ret = 0;
93 struct net *net = nf_ct_net(ct); 96 struct net *net = nf_ct_net(ct);
94 struct nf_ct_event_notifier *notify; 97 struct nf_ct_event_notifier *notify;
98 struct nf_conntrack_ecache *e;
95 99
96 rcu_read_lock(); 100 rcu_read_lock();
97 notify = rcu_dereference(nf_conntrack_event_cb); 101 notify = rcu_dereference(nf_conntrack_event_cb);
@@ -101,29 +105,52 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
101 if (!net->ct.sysctl_events) 105 if (!net->ct.sysctl_events)
102 goto out_unlock; 106 goto out_unlock;
103 107
108 e = nf_ct_ecache_find(ct);
109 if (e == NULL)
110 goto out_unlock;
111
104 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { 112 if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
105 struct nf_ct_event item = { 113 struct nf_ct_event item = {
106 .ct = ct, 114 .ct = ct,
107 .pid = pid, 115 .pid = e->pid ? e->pid : pid,
108 .report = report 116 .report = report
109 }; 117 };
110 notify->fcn(eventmask, &item); 118 /* This is a resent of a destroy event? If so, skip missed */
119 unsigned long missed = e->pid ? 0 : e->missed;
120
121 ret = notify->fcn(eventmask | missed, &item);
122 if (unlikely(ret < 0 || missed)) {
123 spin_lock_bh(&ct->lock);
124 if (ret < 0) {
125 /* This is a destroy event that has been
126 * triggered by a process, we store the PID
127 * to include it in the retransmission. */
128 if (eventmask & (1 << IPCT_DESTROY) &&
129 e->pid == 0 && pid != 0)
130 e->pid = pid;
131 else
132 e->missed |= eventmask;
133 } else
134 e->missed &= ~missed;
135 spin_unlock_bh(&ct->lock);
136 }
111 } 137 }
112out_unlock: 138out_unlock:
113 rcu_read_unlock(); 139 rcu_read_unlock();
140 return ret;
114} 141}
115 142
116static inline void 143static inline int
117nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, 144nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
118 u32 pid, int report) 145 u32 pid, int report)
119{ 146{
120 nf_conntrack_eventmask_report(1 << event, ct, pid, report); 147 return nf_conntrack_eventmask_report(1 << event, ct, pid, report);
121} 148}
122 149
123static inline void 150static inline int
124nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) 151nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
125{ 152{
126 nf_conntrack_eventmask_report(1 << event, ct, 0, 0); 153 return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
127} 154}
128 155
129struct nf_exp_event { 156struct nf_exp_event {
@@ -183,16 +210,16 @@ extern void nf_conntrack_ecache_fini(struct net *net);
183 210
184static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, 211static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
185 struct nf_conn *ct) {} 212 struct nf_conn *ct) {}
186static inline void nf_conntrack_eventmask_report(unsigned int eventmask, 213static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
187 struct nf_conn *ct, 214 struct nf_conn *ct,
188 u32 pid, 215 u32 pid,
189 int report) {} 216 int report) { return 0; }
190static inline void nf_conntrack_event(enum ip_conntrack_events event, 217static inline int nf_conntrack_event(enum ip_conntrack_events event,
191 struct nf_conn *ct) {} 218 struct nf_conn *ct) { return 0; }
192static inline void nf_conntrack_event_report(enum ip_conntrack_events event, 219static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
193 struct nf_conn *ct, 220 struct nf_conn *ct,
194 u32 pid, 221 u32 pid,
195 int report) {} 222 int report) { return 0; }
196static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} 223static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
197static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, 224static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
198 struct nf_conntrack_expect *exp) {} 225 struct nf_conntrack_expect *exp) {}
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 505a51cd8c63..ba1ba0c5efd1 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -14,8 +14,10 @@ struct netns_ct {
14 struct hlist_nulls_head *hash; 14 struct hlist_nulls_head *hash;
15 struct hlist_head *expect_hash; 15 struct hlist_head *expect_hash;
16 struct hlist_nulls_head unconfirmed; 16 struct hlist_nulls_head unconfirmed;
17 struct hlist_nulls_head dying;
17 struct ip_conntrack_stat *stat; 18 struct ip_conntrack_stat *stat;
18 int sysctl_events; 19 int sysctl_events;
20 unsigned int sysctl_events_retry_timeout;
19 int sysctl_acct; 21 int sysctl_acct;
20 int sysctl_checksum; 22 int sysctl_checksum;
21 unsigned int sysctl_log_invalid; /* Log invalid packets */ 23 unsigned int sysctl_log_invalid; /* Log invalid packets */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 14235b144cb5..5f72b94b4918 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -183,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
183 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 183 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
184 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 184 NF_CT_ASSERT(!timer_pending(&ct->timeout));
185 185
186 if (!test_bit(IPS_DYING_BIT, &ct->status))
187 nf_conntrack_event(IPCT_DESTROY, ct);
188 set_bit(IPS_DYING_BIT, &ct->status);
189
190 /* To make sure we don't get any weird locking issues here: 186 /* To make sure we don't get any weird locking issues here:
191 * destroy_conntrack() MUST NOT be called with a write lock 187 * destroy_conntrack() MUST NOT be called with a write lock
192 * to nf_conntrack_lock!!! -HW */ 188 * to nf_conntrack_lock!!! -HW */
@@ -220,9 +216,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
220 nf_conntrack_free(ct); 216 nf_conntrack_free(ct);
221} 217}
222 218
223static void death_by_timeout(unsigned long ul_conntrack) 219void nf_ct_delete_from_lists(struct nf_conn *ct)
224{ 220{
225 struct nf_conn *ct = (void *)ul_conntrack;
226 struct net *net = nf_ct_net(ct); 221 struct net *net = nf_ct_net(ct);
227 222
228 nf_ct_helper_destroy(ct); 223 nf_ct_helper_destroy(ct);
@@ -232,6 +227,59 @@ static void death_by_timeout(unsigned long ul_conntrack)
232 NF_CT_STAT_INC(net, delete_list); 227 NF_CT_STAT_INC(net, delete_list);
233 clean_from_lists(ct); 228 clean_from_lists(ct);
234 spin_unlock_bh(&nf_conntrack_lock); 229 spin_unlock_bh(&nf_conntrack_lock);
230}
231EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
232
233static void death_by_event(unsigned long ul_conntrack)
234{
235 struct nf_conn *ct = (void *)ul_conntrack;
236 struct net *net = nf_ct_net(ct);
237
238 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
239 /* bad luck, let's retry again */
240 ct->timeout.expires = jiffies +
241 (random32() % net->ct.sysctl_events_retry_timeout);
242 add_timer(&ct->timeout);
243 return;
244 }
245 /* we've got the event delivered, now it's dying */
246 set_bit(IPS_DYING_BIT, &ct->status);
247 spin_lock(&nf_conntrack_lock);
248 hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
249 spin_unlock(&nf_conntrack_lock);
250 nf_ct_put(ct);
251}
252
253void nf_ct_insert_dying_list(struct nf_conn *ct)
254{
255 struct net *net = nf_ct_net(ct);
256
257 /* add this conntrack to the dying list */
258 spin_lock_bh(&nf_conntrack_lock);
259 hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
260 &net->ct.dying);
261 spin_unlock_bh(&nf_conntrack_lock);
262 /* set a new timer to retry event delivery */
263 setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
264 ct->timeout.expires = jiffies +
265 (random32() % net->ct.sysctl_events_retry_timeout);
266 add_timer(&ct->timeout);
267}
268EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
269
270static void death_by_timeout(unsigned long ul_conntrack)
271{
272 struct nf_conn *ct = (void *)ul_conntrack;
273
274 if (!test_bit(IPS_DYING_BIT, &ct->status) &&
275 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
276 /* destroy event was not delivered */
277 nf_ct_delete_from_lists(ct);
278 nf_ct_insert_dying_list(ct);
279 return;
280 }
281 set_bit(IPS_DYING_BIT, &ct->status);
282 nf_ct_delete_from_lists(ct);
235 nf_ct_put(ct); 283 nf_ct_put(ct);
236} 284}
237 285
@@ -982,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data)
982{ 1030{
983 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1031 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
984 1032
985 /* get_next_corpse sets the dying bit for us */ 1033 /* If we fail to deliver the event, death_by_timeout() will retry */
986 nf_conntrack_event_report(IPCT_DESTROY, 1034 if (nf_conntrack_event_report(IPCT_DESTROY, i,
987 i, 1035 fr->pid, fr->report) < 0)
988 fr->pid, 1036 return 1;
989 fr->report); 1037
1038 /* Avoid the delivery of the destroy event in death_by_timeout(). */
1039 set_bit(IPS_DYING_BIT, &i->status);
990 return 1; 1040 return 1;
991} 1041}
992 1042
@@ -1015,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
1015} 1065}
1016EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1066EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1017 1067
1068static void nf_ct_release_dying_list(void)
1069{
1070 struct nf_conntrack_tuple_hash *h;
1071 struct nf_conn *ct;
1072 struct hlist_nulls_node *n;
1073
1074 spin_lock_bh(&nf_conntrack_lock);
1075 hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
1076 ct = nf_ct_tuplehash_to_ctrack(h);
1077 /* never fails to remove them, no listeners at this point */
1078 nf_ct_kill(ct);
1079 }
1080 spin_unlock_bh(&nf_conntrack_lock);
1081}
1082
1018static void nf_conntrack_cleanup_init_net(void) 1083static void nf_conntrack_cleanup_init_net(void)
1019{ 1084{
1020 nf_conntrack_helper_fini(); 1085 nf_conntrack_helper_fini();
@@ -1026,6 +1091,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1026{ 1091{
1027 i_see_dead_people: 1092 i_see_dead_people:
1028 nf_ct_iterate_cleanup(net, kill_all, NULL); 1093 nf_ct_iterate_cleanup(net, kill_all, NULL);
1094 nf_ct_release_dying_list();
1029 if (atomic_read(&net->ct.count) != 0) { 1095 if (atomic_read(&net->ct.count) != 0) {
1030 schedule(); 1096 schedule();
1031 goto i_see_dead_people; 1097 goto i_see_dead_people;
@@ -1207,6 +1273,7 @@ static int nf_conntrack_init_net(struct net *net)
1207 1273
1208 atomic_set(&net->ct.count, 0); 1274 atomic_set(&net->ct.count, 0);
1209 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); 1275 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
1276 INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
1210 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1277 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1211 if (!net->ct.stat) { 1278 if (!net->ct.stat) {
1212 ret = -ENOMEM; 1279 ret = -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 683281b78047..aee560b4768d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -56,8 +56,21 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
56 .pid = 0, 56 .pid = 0,
57 .report = 0 57 .report = 0
58 }; 58 };
59 59 int ret;
60 notify->fcn(events, &item); 60 /* We make a copy of the missed event cache without taking
61 * the lock, thus we may send missed events twice. However,
62 * this does not harm and it happens very rarely. */
63 unsigned long missed = e->missed;
64
65 ret = notify->fcn(events | missed, &item);
66 if (unlikely(ret < 0 || missed)) {
67 spin_lock_bh(&ct->lock);
68 if (ret < 0)
69 e->missed |= events;
70 else
71 e->missed &= ~missed;
72 spin_unlock_bh(&ct->lock);
73 }
61 } 74 }
62 75
63out_unlock: 76out_unlock:
@@ -133,6 +146,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
133 146
134#define NF_CT_EVENTS_DEFAULT 1 147#define NF_CT_EVENTS_DEFAULT 1
135static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; 148static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
149static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
136 150
137#ifdef CONFIG_SYSCTL 151#ifdef CONFIG_SYSCTL
138static struct ctl_table event_sysctl_table[] = { 152static struct ctl_table event_sysctl_table[] = {
@@ -144,6 +158,14 @@ static struct ctl_table event_sysctl_table[] = {
144 .mode = 0644, 158 .mode = 0644,
145 .proc_handler = proc_dointvec, 159 .proc_handler = proc_dointvec,
146 }, 160 },
161 {
162 .ctl_name = CTL_UNNUMBERED,
163 .procname = "nf_conntrack_events_retry_timeout",
164 .data = &init_net.ct.sysctl_events_retry_timeout,
165 .maxlen = sizeof(unsigned int),
166 .mode = 0644,
167 .proc_handler = proc_dointvec_jiffies,
168 },
147 {} 169 {}
148}; 170};
149#endif /* CONFIG_SYSCTL */ 171#endif /* CONFIG_SYSCTL */
@@ -165,6 +187,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
165 goto out; 187 goto out;
166 188
167 table[0].data = &net->ct.sysctl_events; 189 table[0].data = &net->ct.sysctl_events;
190 table[1].data = &net->ct.sysctl_events_retry_timeout;
168 191
169 net->ct.event_sysctl_header = 192 net->ct.event_sysctl_header =
170 register_net_sysctl_table(net, 193 register_net_sysctl_table(net,
@@ -205,6 +228,7 @@ int nf_conntrack_ecache_init(struct net *net)
205 int ret; 228 int ret;
206 229
207 net->ct.sysctl_events = nf_ct_events; 230 net->ct.sysctl_events = nf_ct_events;
231 net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
208 232
209 if (net_eq(net, &init_net)) { 233 if (net_eq(net, &init_net)) {
210 ret = nf_ct_extend_register(&event_extend); 234 ret = nf_ct_extend_register(&event_extend);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 19706eff1647..49479d194570 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -463,6 +463,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
463 struct sk_buff *skb; 463 struct sk_buff *skb;
464 unsigned int type; 464 unsigned int type;
465 unsigned int flags = 0, group; 465 unsigned int flags = 0, group;
466 int err;
466 467
467 /* ignore our fake conntrack entry */ 468 /* ignore our fake conntrack entry */
468 if (ct == &nf_conntrack_untracked) 469 if (ct == &nf_conntrack_untracked)
@@ -558,7 +559,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
558 rcu_read_unlock(); 559 rcu_read_unlock();
559 560
560 nlmsg_end(skb, nlh); 561 nlmsg_end(skb, nlh);
561 nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); 562 err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
563 if (err == -ENOBUFS || err == -EAGAIN)
564 return -ENOBUFS;
565
562 return 0; 566 return 0;
563 567
564nla_put_failure: 568nla_put_failure:
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
798 } 802 }
799 } 803 }
800 804
801 nf_conntrack_event_report(IPCT_DESTROY, 805 if (nf_conntrack_event_report(IPCT_DESTROY, ct,
802 ct, 806 NETLINK_CB(skb).pid,
803 NETLINK_CB(skb).pid, 807 nlmsg_report(nlh)) < 0) {
804 nlmsg_report(nlh)); 808 nf_ct_delete_from_lists(ct);
809 /* we failed to report the event, try later */
810 nf_ct_insert_dying_list(ct);
811 nf_ct_put(ct);
812 return 0;
813 }
805 814
806 /* death_by_timeout would report the event again */ 815 /* death_by_timeout would report the event again */
807 set_bit(IPS_DYING_BIT, &ct->status); 816 set_bit(IPS_DYING_BIT, &ct->status);