diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 89 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_ecache.c | 28 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_netlink.c | 19 |
3 files changed, 118 insertions, 18 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 14235b144cb5..5f72b94b4918 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -183,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
183 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); | 183 | NF_CT_ASSERT(atomic_read(&nfct->use) == 0); |
184 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); | 184 | NF_CT_ASSERT(!timer_pending(&ct->timeout)); |
185 | 185 | ||
186 | if (!test_bit(IPS_DYING_BIT, &ct->status)) | ||
187 | nf_conntrack_event(IPCT_DESTROY, ct); | ||
188 | set_bit(IPS_DYING_BIT, &ct->status); | ||
189 | |||
190 | /* To make sure we don't get any weird locking issues here: | 186 | /* To make sure we don't get any weird locking issues here: |
191 | * destroy_conntrack() MUST NOT be called with a write lock | 187 | * destroy_conntrack() MUST NOT be called with a write lock |
192 | * to nf_conntrack_lock!!! -HW */ | 188 | * to nf_conntrack_lock!!! -HW */ |
@@ -220,9 +216,8 @@ destroy_conntrack(struct nf_conntrack *nfct) | |||
220 | nf_conntrack_free(ct); | 216 | nf_conntrack_free(ct); |
221 | } | 217 | } |
222 | 218 | ||
223 | static void death_by_timeout(unsigned long ul_conntrack) | 219 | void nf_ct_delete_from_lists(struct nf_conn *ct) |
224 | { | 220 | { |
225 | struct nf_conn *ct = (void *)ul_conntrack; | ||
226 | struct net *net = nf_ct_net(ct); | 221 | struct net *net = nf_ct_net(ct); |
227 | 222 | ||
228 | nf_ct_helper_destroy(ct); | 223 | nf_ct_helper_destroy(ct); |
@@ -232,6 +227,59 @@ static void death_by_timeout(unsigned long ul_conntrack) | |||
232 | NF_CT_STAT_INC(net, delete_list); | 227 | NF_CT_STAT_INC(net, delete_list); |
233 | clean_from_lists(ct); | 228 | clean_from_lists(ct); |
234 | spin_unlock_bh(&nf_conntrack_lock); | 229 | spin_unlock_bh(&nf_conntrack_lock); |
230 | } | ||
231 | EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); | ||
232 | |||
233 | static void death_by_event(unsigned long ul_conntrack) | ||
234 | { | ||
235 | struct nf_conn *ct = (void *)ul_conntrack; | ||
236 | struct net *net = nf_ct_net(ct); | ||
237 | |||
238 | if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { | ||
239 | /* bad luck, let's retry again */ | ||
240 | ct->timeout.expires = jiffies + | ||
241 | (random32() % net->ct.sysctl_events_retry_timeout); | ||
242 | add_timer(&ct->timeout); | ||
243 | return; | ||
244 | } | ||
245 | /* we've got the event delivered, now it's dying */ | ||
246 | set_bit(IPS_DYING_BIT, &ct->status); | ||
247 | spin_lock(&nf_conntrack_lock); | ||
248 | hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); | ||
249 | spin_unlock(&nf_conntrack_lock); | ||
250 | nf_ct_put(ct); | ||
251 | } | ||
252 | |||
253 | void nf_ct_insert_dying_list(struct nf_conn *ct) | ||
254 | { | ||
255 | struct net *net = nf_ct_net(ct); | ||
256 | |||
257 | /* add this conntrack to the dying list */ | ||
258 | spin_lock_bh(&nf_conntrack_lock); | ||
259 | hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, | ||
260 | &net->ct.dying); | ||
261 | spin_unlock_bh(&nf_conntrack_lock); | ||
262 | /* set a new timer to retry event delivery */ | ||
263 | setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); | ||
264 | ct->timeout.expires = jiffies + | ||
265 | (random32() % net->ct.sysctl_events_retry_timeout); | ||
266 | add_timer(&ct->timeout); | ||
267 | } | ||
268 | EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); | ||
269 | |||
270 | static void death_by_timeout(unsigned long ul_conntrack) | ||
271 | { | ||
272 | struct nf_conn *ct = (void *)ul_conntrack; | ||
273 | |||
274 | if (!test_bit(IPS_DYING_BIT, &ct->status) && | ||
275 | unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { | ||
276 | /* destroy event was not delivered */ | ||
277 | nf_ct_delete_from_lists(ct); | ||
278 | nf_ct_insert_dying_list(ct); | ||
279 | return; | ||
280 | } | ||
281 | set_bit(IPS_DYING_BIT, &ct->status); | ||
282 | nf_ct_delete_from_lists(ct); | ||
235 | nf_ct_put(ct); | 283 | nf_ct_put(ct); |
236 | } | 284 | } |
237 | 285 | ||
@@ -982,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data) | |||
982 | { | 1030 | { |
983 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; | 1031 | struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; |
984 | 1032 | ||
985 | /* get_next_corpse sets the dying bit for us */ | 1033 | /* If we fail to deliver the event, death_by_timeout() will retry */ |
986 | nf_conntrack_event_report(IPCT_DESTROY, | 1034 | if (nf_conntrack_event_report(IPCT_DESTROY, i, |
987 | i, | 1035 | fr->pid, fr->report) < 0) |
988 | fr->pid, | 1036 | return 1; |
989 | fr->report); | 1037 | |
1038 | /* Avoid the delivery of the destroy event in death_by_timeout(). */ | ||
1039 | set_bit(IPS_DYING_BIT, &i->status); | ||
990 | return 1; | 1040 | return 1; |
991 | } | 1041 | } |
992 | 1042 | ||
@@ -1015,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report) | |||
1015 | } | 1065 | } |
1016 | EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); | 1066 | EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); |
1017 | 1067 | ||
1068 | static void nf_ct_release_dying_list(void) | ||
1069 | { | ||
1070 | struct nf_conntrack_tuple_hash *h; | ||
1071 | struct nf_conn *ct; | ||
1072 | struct hlist_nulls_node *n; | ||
1073 | |||
1074 | spin_lock_bh(&nf_conntrack_lock); | ||
1075 | hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) { | ||
1076 | ct = nf_ct_tuplehash_to_ctrack(h); | ||
1077 | /* never fails to remove them, no listeners at this point */ | ||
1078 | nf_ct_kill(ct); | ||
1079 | } | ||
1080 | spin_unlock_bh(&nf_conntrack_lock); | ||
1081 | } | ||
1082 | |||
1018 | static void nf_conntrack_cleanup_init_net(void) | 1083 | static void nf_conntrack_cleanup_init_net(void) |
1019 | { | 1084 | { |
1020 | nf_conntrack_helper_fini(); | 1085 | nf_conntrack_helper_fini(); |
@@ -1026,6 +1091,7 @@ static void nf_conntrack_cleanup_net(struct net *net) | |||
1026 | { | 1091 | { |
1027 | i_see_dead_people: | 1092 | i_see_dead_people: |
1028 | nf_ct_iterate_cleanup(net, kill_all, NULL); | 1093 | nf_ct_iterate_cleanup(net, kill_all, NULL); |
1094 | nf_ct_release_dying_list(); | ||
1029 | if (atomic_read(&net->ct.count) != 0) { | 1095 | if (atomic_read(&net->ct.count) != 0) { |
1030 | schedule(); | 1096 | schedule(); |
1031 | goto i_see_dead_people; | 1097 | goto i_see_dead_people; |
@@ -1207,6 +1273,7 @@ static int nf_conntrack_init_net(struct net *net) | |||
1207 | 1273 | ||
1208 | atomic_set(&net->ct.count, 0); | 1274 | atomic_set(&net->ct.count, 0); |
1209 | INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); | 1275 | INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); |
1276 | INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); | ||
1210 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); | 1277 | net->ct.stat = alloc_percpu(struct ip_conntrack_stat); |
1211 | if (!net->ct.stat) { | 1278 | if (!net->ct.stat) { |
1212 | ret = -ENOMEM; | 1279 | ret = -ENOMEM; |
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 683281b78047..aee560b4768d 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c | |||
@@ -56,8 +56,21 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) | |||
56 | .pid = 0, | 56 | .pid = 0, |
57 | .report = 0 | 57 | .report = 0 |
58 | }; | 58 | }; |
59 | 59 | int ret; | |
60 | notify->fcn(events, &item); | 60 | /* We make a copy of the missed event cache without taking |
61 | * the lock, thus we may send missed events twice. However, | ||
62 | * this does not harm and it happens very rarely. */ | ||
63 | unsigned long missed = e->missed; | ||
64 | |||
65 | ret = notify->fcn(events | missed, &item); | ||
66 | if (unlikely(ret < 0 || missed)) { | ||
67 | spin_lock_bh(&ct->lock); | ||
68 | if (ret < 0) | ||
69 | e->missed |= events; | ||
70 | else | ||
71 | e->missed &= ~missed; | ||
72 | spin_unlock_bh(&ct->lock); | ||
73 | } | ||
61 | } | 74 | } |
62 | 75 | ||
63 | out_unlock: | 76 | out_unlock: |
@@ -133,6 +146,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); | |||
133 | 146 | ||
134 | #define NF_CT_EVENTS_DEFAULT 1 | 147 | #define NF_CT_EVENTS_DEFAULT 1 |
135 | static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; | 148 | static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; |
149 | static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; | ||
136 | 150 | ||
137 | #ifdef CONFIG_SYSCTL | 151 | #ifdef CONFIG_SYSCTL |
138 | static struct ctl_table event_sysctl_table[] = { | 152 | static struct ctl_table event_sysctl_table[] = { |
@@ -144,6 +158,14 @@ static struct ctl_table event_sysctl_table[] = { | |||
144 | .mode = 0644, | 158 | .mode = 0644, |
145 | .proc_handler = proc_dointvec, | 159 | .proc_handler = proc_dointvec, |
146 | }, | 160 | }, |
161 | { | ||
162 | .ctl_name = CTL_UNNUMBERED, | ||
163 | .procname = "nf_conntrack_events_retry_timeout", | ||
164 | .data = &init_net.ct.sysctl_events_retry_timeout, | ||
165 | .maxlen = sizeof(unsigned int), | ||
166 | .mode = 0644, | ||
167 | .proc_handler = proc_dointvec_jiffies, | ||
168 | }, | ||
147 | {} | 169 | {} |
148 | }; | 170 | }; |
149 | #endif /* CONFIG_SYSCTL */ | 171 | #endif /* CONFIG_SYSCTL */ |
@@ -165,6 +187,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net) | |||
165 | goto out; | 187 | goto out; |
166 | 188 | ||
167 | table[0].data = &net->ct.sysctl_events; | 189 | table[0].data = &net->ct.sysctl_events; |
190 | table[1].data = &net->ct.sysctl_events_retry_timeout; | ||
168 | 191 | ||
169 | net->ct.event_sysctl_header = | 192 | net->ct.event_sysctl_header = |
170 | register_net_sysctl_table(net, | 193 | register_net_sysctl_table(net, |
@@ -205,6 +228,7 @@ int nf_conntrack_ecache_init(struct net *net) | |||
205 | int ret; | 228 | int ret; |
206 | 229 | ||
207 | net->ct.sysctl_events = nf_ct_events; | 230 | net->ct.sysctl_events = nf_ct_events; |
231 | net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; | ||
208 | 232 | ||
209 | if (net_eq(net, &init_net)) { | 233 | if (net_eq(net, &init_net)) { |
210 | ret = nf_ct_extend_register(&event_extend); | 234 | ret = nf_ct_extend_register(&event_extend); |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 19706eff1647..49479d194570 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -463,6 +463,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
463 | struct sk_buff *skb; | 463 | struct sk_buff *skb; |
464 | unsigned int type; | 464 | unsigned int type; |
465 | unsigned int flags = 0, group; | 465 | unsigned int flags = 0, group; |
466 | int err; | ||
466 | 467 | ||
467 | /* ignore our fake conntrack entry */ | 468 | /* ignore our fake conntrack entry */ |
468 | if (ct == &nf_conntrack_untracked) | 469 | if (ct == &nf_conntrack_untracked) |
@@ -558,7 +559,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) | |||
558 | rcu_read_unlock(); | 559 | rcu_read_unlock(); |
559 | 560 | ||
560 | nlmsg_end(skb, nlh); | 561 | nlmsg_end(skb, nlh); |
561 | nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); | 562 | err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); |
563 | if (err == -ENOBUFS || err == -EAGAIN) | ||
564 | return -ENOBUFS; | ||
565 | |||
562 | return 0; | 566 | return 0; |
563 | 567 | ||
564 | nla_put_failure: | 568 | nla_put_failure: |
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, | |||
798 | } | 802 | } |
799 | } | 803 | } |
800 | 804 | ||
801 | nf_conntrack_event_report(IPCT_DESTROY, | 805 | if (nf_conntrack_event_report(IPCT_DESTROY, ct, |
802 | ct, | 806 | NETLINK_CB(skb).pid, |
803 | NETLINK_CB(skb).pid, | 807 | nlmsg_report(nlh)) < 0) { |
804 | nlmsg_report(nlh)); | 808 | nf_ct_delete_from_lists(ct); |
809 | /* we failed to report the event, try later */ | ||
810 | nf_ct_insert_dying_list(ct); | ||
811 | nf_ct_put(ct); | ||
812 | return 0; | ||
813 | } | ||
805 | 814 | ||
806 | /* death_by_timeout would report the event again */ | 815 | /* death_by_timeout would report the event again */ |
807 | set_bit(IPS_DYING_BIT, &ct->status); | 816 | set_bit(IPS_DYING_BIT, &ct->status); |