netfilter: conntrack: optional reliable conntrack event delivery

This patch improves ctnetlink event reliability if one broadcast listener has set the NETLINK_BROADCAST_ERROR socket option. The logic is the following: if an event delivery fails, we keep the undelivered events in the missed event cache. Once the next packet arrives, we add the new events (if any) to the missed events in the cache and we try a new delivery, and so on. Thus, if ctnetlink fails to deliver an event, we try to deliver them once we see a new packet. Therefore, we may lose state transitions but the userspace process gets in sync at some point. At worst case, if no events were delivered to userspace, we make sure that destroy events are successfully delivered. Basically, if ctnetlink fails to deliver the destroy event, we remove the conntrack entry from the hashes and we insert them in the dying list, which contains inactive entries. Then, the conntrack timer is added with an extra grace timeout of random32() % 15 seconds to trigger the event again (this grace timeout is tunable via /proc). The use of a limited random timeout value allows distributing the "destroy" resends, thus, avoiding accumulating lots "destroy" events at the same time. Event delivery may re-order but we can identify them by means of the tuple plus the conntrack ID. The maximum number of conntrack entries (active or inactive) is still handled by nf_conntrack_max. Thus, we may start dropping packets at some point if we accumulate a lot of inactive conntrack entries that did not successfully report the destroy event to userspace. During my stress tests consisting of setting a very small buffer of 2048 bytes for conntrackd and the NETLINK_BROADCAST_ERROR socket flag, and generating lots of very small connections, I noticed very few destroy entries on the fly waiting to be resend. A simple way to test this patch consist of creating a lot of entries, set a very small Netlink buffer in conntrackd (+ a patch which is not in the git tree to set the BROADCAST_ERROR flag) and invoke `conntrack -F'. For expectations, no changes are introduced in this patch. Currently, event delivery is only done for new expectations (no events from expectation expiration, removal and confirmation). In that case, they need a per-expectation event cache to implement the same idea that is exposed in this patch. This patch can be useful to provide reliable flow-accouting. We still have to add a new conntrack extension to store the creation and destroy time. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Patrick McHardy <kaber@trash.net>
author: Pablo Neira Ayuso <pablo@netfilter.org> 2009-06-13 06:30:52 -0400
committer: Patrick McHardy <kaber@trash.net> 2009-06-13 06:30:52 -0400
commit: dd7669a92c6066b2b31bae7e04cd787092920883 (patch)
tree: d06a9e18aec99c5a34a191cb3391e74ba8a8ec59 /net
parent: d219dce76c64f2c883dad0537fa09a56d5ff0a10 (diff)
3 files changed, 118 insertions, 18 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 14235b144cb5..5f72b94b4918 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -183,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
        NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
        NF_CT_ASSERT(!timer_pending(&ct->timeout));
-        if (!test_bit(IPS_DYING_BIT, &ct->status))
-                nf_conntrack_event(IPCT_DESTROY, ct);
-        set_bit(IPS_DYING_BIT, &ct->status);
        /* To make sure we don't get any weird locking issues here:
         * destroy_conntrack() MUST NOT be called with a write lock
         * to nf_conntrack_lock!!! -HW */
@@ -220,9 +216,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
        nf_conntrack_free(ct);
 }
-static void death_by_timeout(unsigned long ul_conntrack)
+void nf_ct_delete_from_lists(struct nf_conn *ct)
 {
-        struct nf_conn *ct = (void *)ul_conntrack;
        struct net *net = nf_ct_net(ct);
        nf_ct_helper_destroy(ct);
@@ -232,6 +227,59 @@ static void death_by_timeout(unsigned long ul_conntrack)
        NF_CT_STAT_INC(net, delete_list);
        clean_from_lists(ct);
        spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
+static void death_by_event(unsigned long ul_conntrack)
+{
+        struct nf_conn *ct = (void *)ul_conntrack;
+        struct net *net = nf_ct_net(ct);
+        if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
+                /* bad luck, let's retry again */
+                ct->timeout.expires = jiffies +
+                        (random32() % net->ct.sysctl_events_retry_timeout);
+                add_timer(&ct->timeout);
+                return;
+        }
+        /* we've got the event delivered, now it's dying */
+        set_bit(IPS_DYING_BIT, &ct->status);
+        spin_lock(&nf_conntrack_lock);
+        hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+        spin_unlock(&nf_conntrack_lock);
+        nf_ct_put(ct);
+}
+void nf_ct_insert_dying_list(struct nf_conn *ct)
+{
+        struct net *net = nf_ct_net(ct);
+        /* add this conntrack to the dying list */
+        spin_lock_bh(&nf_conntrack_lock);
+        hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+                             &net->ct.dying);
+        spin_unlock_bh(&nf_conntrack_lock);
+        /* set a new timer to retry event delivery */
+        setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
+        ct->timeout.expires = jiffies +
+                (random32() % net->ct.sysctl_events_retry_timeout);
+        add_timer(&ct->timeout);
+}
+EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+        struct nf_conn *ct = (void *)ul_conntrack;
+        if (!test_bit(IPS_DYING_BIT, &ct->status) &&
+            unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+                /* destroy event was not delivered */
+                nf_ct_delete_from_lists(ct);
+                nf_ct_insert_dying_list(ct);
+                return;
+        }
+        set_bit(IPS_DYING_BIT, &ct->status);
+        nf_ct_delete_from_lists(ct);
        nf_ct_put(ct);
 }
@@ -982,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data)
 {
        struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
-        /* get_next_corpse sets the dying bit for us */
+        /* If we fail to deliver the event, death_by_timeout() will retry */
-        nf_conntrack_event_report(IPCT_DESTROY,
+        if (nf_conntrack_event_report(IPCT_DESTROY, i,
-                                  i,
+                                      fr->pid, fr->report) < 0)
-                                  fr->pid,
+                return 1;
-                                  fr->report);
+        /* Avoid the delivery of the destroy event in death_by_timeout(). */
+        set_bit(IPS_DYING_BIT, &i->status);
        return 1;
 }
@@ -1015,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
+static void nf_ct_release_dying_list(void)
+{
+        struct nf_conntrack_tuple_hash *h;
+        struct nf_conn *ct;
+        struct hlist_nulls_node *n;
+        spin_lock_bh(&nf_conntrack_lock);
+        hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
+                ct = nf_ct_tuplehash_to_ctrack(h);
+                /* never fails to remove them, no listeners at this point */
+                nf_ct_kill(ct);
+        }
+        spin_unlock_bh(&nf_conntrack_lock);
+}
 static void nf_conntrack_cleanup_init_net(void)
 {
        nf_conntrack_helper_fini();
@@ -1026,6 +1091,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 {
 i_see_dead_people:
        nf_ct_iterate_cleanup(net, kill_all, NULL);
+        nf_ct_release_dying_list();
        if (atomic_read(&net->ct.count) != 0) {
                schedule();
                goto i_see_dead_people;
@@ -1207,6 +1273,7 @@ static int nf_conntrack_init_net(struct net *net)
        atomic_set(&net->ct.count, 0);
        INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
+        INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
        net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
        if (!net->ct.stat) {
                ret = -ENOMEM;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 683281b78047..aee560b4768d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -56,8 +56,21 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
                        .pid    = 0,
                        .report = 0
                };
+                int ret;
-                notify->fcn(events, &item);
+                /* We make a copy of the missed event cache without taking
+                 * the lock, thus we may send missed events twice. However,
+                 * this does not harm and it happens very rarely. */
+                unsigned long missed = e->missed;
+                ret = notify->fcn(events | missed, &item);
+                if (unlikely(ret < 0 || missed)) {
+                        spin_lock_bh(&ct->lock);
+                        if (ret < 0)
+                                e->missed |= events;
+                        else
+                                e->missed &= ~missed;
+                        spin_unlock_bh(&ct->lock);
+                } 
        }
 out_unlock:
@@ -133,6 +146,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 #define NF_CT_EVENTS_DEFAULT 1
 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
 #ifdef CONFIG_SYSCTL
 static struct ctl_table event_sysctl_table[] = {
@@ -144,6 +158,14 @@ static struct ctl_table event_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "nf_conntrack_events_retry_timeout",
+                .data           = &init_net.ct.sysctl_events_retry_timeout,
+                .maxlen         = sizeof(unsigned int),
+                .mode           = 0644,
+                .proc_handler   = proc_dointvec_jiffies,
+        },
        {}
 };
 #endif /* CONFIG_SYSCTL */
@@ -165,6 +187,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
                goto out;
        table[0].data = &net->ct.sysctl_events;
+        table[1].data = &net->ct.sysctl_events_retry_timeout;
        net->ct.event_sysctl_header =
                register_net_sysctl_table(net,
@@ -205,6 +228,7 @@ int nf_conntrack_ecache_init(struct net *net)
        int ret;
        net->ct.sysctl_events = nf_ct_events;
+        net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
        if (net_eq(net, &init_net)) {
                ret = nf_ct_extend_register(&event_extend);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 19706eff1647..49479d194570 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -463,6 +463,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
        struct sk_buff *skb;
        unsigned int type;
        unsigned int flags = 0, group;
+        int err;
        /* ignore our fake conntrack entry */
        if (ct == &nf_conntrack_untracked)
@@ -558,7 +559,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
        rcu_read_unlock();
        nlmsg_end(skb, nlh);
-        nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+        err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+        if (err == -ENOBUFS || err == -EAGAIN)
+                return -ENOBUFS;
        return 0;
 nla_put_failure:
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
                }
        }
-        nf_conntrack_event_report(IPCT_DESTROY,
+        if (nf_conntrack_event_report(IPCT_DESTROY, ct,
-                                  ct,
+                                      NETLINK_CB(skb).pid,
-                                  NETLINK_CB(skb).pid,
+                                      nlmsg_report(nlh)) < 0) {
-                                  nlmsg_report(nlh));
+                nf_ct_delete_from_lists(ct);
+                /* we failed to report the event, try later */
+                nf_ct_insert_dying_list(ct);
+                nf_ct_put(ct);
+                return 0;
+        }
        /* death_by_timeout would report the event again */
        set_bit(IPS_DYING_BIT, &ct->status);
author	Pablo Neira Ayuso <pablo@netfilter.org>	2009-06-13 06:30:52 -0400
committer	Patrick McHardy <kaber@trash.net>	2009-06-13 06:30:52 -0400
commit	dd7669a92c6066b2b31bae7e04cd787092920883 (patch)
tree	d06a9e18aec99c5a34a191cb3391e74ba8a8ec59 /net
parent	d219dce76c64f2c883dad0537fa09a56d5ff0a10 (diff)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 14235b144cb5..5f72b94b4918 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c
@@ -183,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
183	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);	183	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
184	NF_CT_ASSERT(!timer_pending(&ct->timeout));	184	NF_CT_ASSERT(!timer_pending(&ct->timeout));
185		185
186	if (!test_bit(IPS_DYING_BIT, &ct->status))
187	nf_conntrack_event(IPCT_DESTROY, ct);
188	set_bit(IPS_DYING_BIT, &ct->status);
189
190	/* To make sure we don't get any weird locking issues here:	186	/* To make sure we don't get any weird locking issues here:
191	* destroy_conntrack() MUST NOT be called with a write lock	187	* destroy_conntrack() MUST NOT be called with a write lock
192	* to nf_conntrack_lock!!! -HW */	188	* to nf_conntrack_lock!!! -HW */
@@ -220,9 +216,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
220	nf_conntrack_free(ct);	216	nf_conntrack_free(ct);
221	}	217	}
222		218
223	static void death_by_timeout(unsigned long ul_conntrack)	219	void nf_ct_delete_from_lists(struct nf_conn *ct)
224	{	220	{
225	struct nf_conn ct = (void )ul_conntrack;
226	struct net *net = nf_ct_net(ct);	221	struct net *net = nf_ct_net(ct);
227		222
228	nf_ct_helper_destroy(ct);	223	nf_ct_helper_destroy(ct);
@@ -232,6 +227,59 @@ static void death_by_timeout(unsigned long ul_conntrack)
232	NF_CT_STAT_INC(net, delete_list);	227	NF_CT_STAT_INC(net, delete_list);
233	clean_from_lists(ct);	228	clean_from_lists(ct);
234	spin_unlock_bh(&nf_conntrack_lock);	229	spin_unlock_bh(&nf_conntrack_lock);
		230	}
		231	EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
		232
		233	static void death_by_event(unsigned long ul_conntrack)
		234	{
		235	struct nf_conn ct = (void )ul_conntrack;
		236	struct net *net = nf_ct_net(ct);
		237
		238	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
		239	/* bad luck, let's retry again */
		240	ct->timeout.expires = jiffies +
		241	(random32() % net->ct.sysctl_events_retry_timeout);
		242	add_timer(&ct->timeout);
		243	return;
		244	}
		245	/* we've got the event delivered, now it's dying */
		246	set_bit(IPS_DYING_BIT, &ct->status);
		247	spin_lock(&nf_conntrack_lock);
		248	hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
		249	spin_unlock(&nf_conntrack_lock);
		250	nf_ct_put(ct);
		251	}
		252
		253	void nf_ct_insert_dying_list(struct nf_conn *ct)
		254	{
		255	struct net *net = nf_ct_net(ct);
		256
		257	/* add this conntrack to the dying list */
		258	spin_lock_bh(&nf_conntrack_lock);
		259	hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
		260	&net->ct.dying);
		261	spin_unlock_bh(&nf_conntrack_lock);
		262	/* set a new timer to retry event delivery */
		263	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
		264	ct->timeout.expires = jiffies +
		265	(random32() % net->ct.sysctl_events_retry_timeout);
		266	add_timer(&ct->timeout);
		267	}
		268	EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
		269
		270	static void death_by_timeout(unsigned long ul_conntrack)
		271	{
		272	struct nf_conn ct = (void )ul_conntrack;
		273
		274	if (!test_bit(IPS_DYING_BIT, &ct->status) &&
		275	unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
		276	/* destroy event was not delivered */
		277	nf_ct_delete_from_lists(ct);
		278	nf_ct_insert_dying_list(ct);
		279	return;
		280	}
		281	set_bit(IPS_DYING_BIT, &ct->status);
		282	nf_ct_delete_from_lists(ct);
235	nf_ct_put(ct);	283	nf_ct_put(ct);
236	}	284	}
237		285
@@ -982,11 +1030,13 @@ static int kill_report(struct nf_conn i, void data)
982	{	1030	{
983	struct __nf_ct_flush_report fr = (struct __nf_ct_flush_report )data;	1031	struct __nf_ct_flush_report fr = (struct __nf_ct_flush_report )data;
984		1032
985	/* get_next_corpse sets the dying bit for us */	1033	/* If we fail to deliver the event, death_by_timeout() will retry */
986	nf_conntrack_event_report(IPCT_DESTROY,	1034	if (nf_conntrack_event_report(IPCT_DESTROY, i,
987	i,	1035	fr->pid, fr->report) < 0)
988	fr->pid,	1036	return 1;
989	fr->report);	1037
		1038	/* Avoid the delivery of the destroy event in death_by_timeout(). */
		1039	set_bit(IPS_DYING_BIT, &i->status);
990	return 1;	1040	return 1;
991	}	1041	}
992		1042
@@ -1015,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
1015	}	1065	}
1016	EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);	1066	EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
1017		1067
		1068	static void nf_ct_release_dying_list(void)
		1069	{
		1070	struct nf_conntrack_tuple_hash *h;
		1071	struct nf_conn *ct;
		1072	struct hlist_nulls_node *n;
		1073
		1074	spin_lock_bh(&nf_conntrack_lock);
		1075	hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
		1076	ct = nf_ct_tuplehash_to_ctrack(h);
		1077	/* never fails to remove them, no listeners at this point */
		1078	nf_ct_kill(ct);
		1079	}
		1080	spin_unlock_bh(&nf_conntrack_lock);
		1081	}
		1082
1018	static void nf_conntrack_cleanup_init_net(void)	1083	static void nf_conntrack_cleanup_init_net(void)
1019	{	1084	{
1020	nf_conntrack_helper_fini();	1085	nf_conntrack_helper_fini();
@@ -1026,6 +1091,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
1026	{	1091	{
1027	i_see_dead_people:	1092	i_see_dead_people:
1028	nf_ct_iterate_cleanup(net, kill_all, NULL);	1093	nf_ct_iterate_cleanup(net, kill_all, NULL);
		1094	nf_ct_release_dying_list();
1029	if (atomic_read(&net->ct.count) != 0) {	1095	if (atomic_read(&net->ct.count) != 0) {
1030	schedule();	1096	schedule();
1031	goto i_see_dead_people;	1097	goto i_see_dead_people;
@@ -1207,6 +1273,7 @@ static int nf_conntrack_init_net(struct net *net)
1207		1273
1208	atomic_set(&net->ct.count, 0);	1274	atomic_set(&net->ct.count, 0);
1209	INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);	1275	INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
		1276	INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
1210	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);	1277	net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1211	if (!net->ct.stat) {	1278	if (!net->ct.stat) {
1212	ret = -ENOMEM;	1279	ret = -ENOMEM;


diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 683281b78047..aee560b4768d 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c
@@ -56,8 +56,21 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
56	.pid = 0,	56	.pid = 0,
57	.report = 0	57	.report = 0
58	};	58	};
59		59	int ret;
60	notify->fcn(events, &item);	60	/* We make a copy of the missed event cache without taking
		61	* the lock, thus we may send missed events twice. However,
		62	* this does not harm and it happens very rarely. */
		63	unsigned long missed = e->missed;
		64
		65	ret = notify->fcn(events \| missed, &item);
		66	if (unlikely(ret < 0 \|\| missed)) {
		67	spin_lock_bh(&ct->lock);
		68	if (ret < 0)
		69	e->missed \|= events;
		70	else
		71	e->missed &= ~missed;
		72	spin_unlock_bh(&ct->lock);
		73	}
61	}	74	}
62		75
63	out_unlock:	76	out_unlock:
@@ -133,6 +146,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
133		146
134	#define NF_CT_EVENTS_DEFAULT 1	147	#define NF_CT_EVENTS_DEFAULT 1
135	static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;	148	static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
		149	static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
136		150
137	#ifdef CONFIG_SYSCTL	151	#ifdef CONFIG_SYSCTL
138	static struct ctl_table event_sysctl_table[] = {	152	static struct ctl_table event_sysctl_table[] = {
@@ -144,6 +158,14 @@ static struct ctl_table event_sysctl_table[] = {
144	.mode = 0644,	158	.mode = 0644,
145	.proc_handler = proc_dointvec,	159	.proc_handler = proc_dointvec,
146	},	160	},
		161	{
		162	.ctl_name = CTL_UNNUMBERED,
		163	.procname = "nf_conntrack_events_retry_timeout",
		164	.data = &init_net.ct.sysctl_events_retry_timeout,
		165	.maxlen = sizeof(unsigned int),
		166	.mode = 0644,
		167	.proc_handler = proc_dointvec_jiffies,
		168	},
147	{}	169	{}
148	};	170	};
149	#endif /* CONFIG_SYSCTL */	171	#endif /* CONFIG_SYSCTL */
@@ -165,6 +187,7 @@ static int nf_conntrack_event_init_sysctl(struct net *net)
165	goto out;	187	goto out;
166		188
167	table[0].data = &net->ct.sysctl_events;	189	table[0].data = &net->ct.sysctl_events;
		190	table[1].data = &net->ct.sysctl_events_retry_timeout;
168		191
169	net->ct.event_sysctl_header =	192	net->ct.event_sysctl_header =
170	register_net_sysctl_table(net,	193	register_net_sysctl_table(net,
@@ -205,6 +228,7 @@ int nf_conntrack_ecache_init(struct net *net)
205	int ret;	228	int ret;
206		229
207	net->ct.sysctl_events = nf_ct_events;	230	net->ct.sysctl_events = nf_ct_events;
		231	net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
208		232
209	if (net_eq(net, &init_net)) {	233	if (net_eq(net, &init_net)) {
210	ret = nf_ct_extend_register(&event_extend);	234	ret = nf_ct_extend_register(&event_extend);


diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 19706eff1647..49479d194570 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c
@@ -463,6 +463,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
463	struct sk_buff *skb;	463	struct sk_buff *skb;
464	unsigned int type;	464	unsigned int type;
465	unsigned int flags = 0, group;	465	unsigned int flags = 0, group;
		466	int err;
466		467
467	/* ignore our fake conntrack entry */	468	/* ignore our fake conntrack entry */
468	if (ct == &nf_conntrack_untracked)	469	if (ct == &nf_conntrack_untracked)
@@ -558,7 +559,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
558	rcu_read_unlock();	559	rcu_read_unlock();
559		560
560	nlmsg_end(skb, nlh);	561	nlmsg_end(skb, nlh);
561	nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);	562	err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
		563	if (err == -ENOBUFS \|\| err == -EAGAIN)
		564	return -ENOBUFS;
		565
562	return 0;	566	return 0;
563		567
564	nla_put_failure:	568	nla_put_failure:
@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock ctnl, struct sk_buff skb,
798	}	802	}
799	}	803	}
800		804
801	nf_conntrack_event_report(IPCT_DESTROY,	805	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
802	ct,	806	NETLINK_CB(skb).pid,
803	NETLINK_CB(skb).pid,	807	nlmsg_report(nlh)) < 0) {
804	nlmsg_report(nlh));	808	nf_ct_delete_from_lists(ct);
		809	/* we failed to report the event, try later */
		810	nf_ct_insert_dying_list(ct);
		811	nf_ct_put(ct);
		812	return 0;
		813	}
805		814
806	/* death_by_timeout would report the event again */	815	/* death_by_timeout would report the event again */
807	set_bit(IPS_DYING_BIT, &ct->status);	816	set_bit(IPS_DYING_BIT, &ct->status);