From 55432d2b543a4b6dfae54f5c432a566877a85d90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 5 Jun 2012 03:00:18 +0000
Subject: inetpeer: fix a race in inetpeer_gc_worker()

commit 5faa5df1fa2024 (inetpeer: Invalidate the inetpeer tree along with
the routing cache) added a race :

Before freeing an inetpeer, we must respect a RCU grace period, and make
sure no user will attempt to increase refcnt.

inetpeer_invalidate_tree() waits for a RCU grace period before inserting
inetpeer tree into gc_list and waking the worker. At that time, no
concurrent lookup can find a inetpeer in this tree.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index b94765e38e80..2040bff945d4 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -40,7 +40,10 @@ struct inet_peer {
 	u32			pmtu_orig;
 	u32			pmtu_learned;
 	struct inetpeer_addr_base redirect_learned;
-	struct list_head	gc_list;
+	union {
+		struct list_head	gc_list;
+		struct rcu_head     gc_rcu;
+	};
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
-- 
cgit v1.2.2


From 1c2e004183178e1947882cd2e74f37826f45230e Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 8 Jun 2012 23:31:13 +0800
Subject: Bluetooth: Add support for encryption key refresh

With LE/SMP the completion of a security level elavation from medium to
high is indicated by a HCI Encryption Key Refresh Complete event. The
necessary behavior upon receiving this event is a mix of what's done for
auth_complete and encryption_change, which is also where most of the
event handling code has been copied from.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 66a7b579e31c..3def64ba77fa 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1144,6 +1144,12 @@ struct extended_inquiry_info {
 	__u8     data[240];
 } __packed;
 
+#define HCI_EV_KEY_REFRESH_COMPLETE	0x30
+struct hci_ev_key_refresh_complete {
+	__u8	status;
+	__le16	handle;
+} __packed;
+
 #define HCI_EV_IO_CAPA_REQUEST		0x31
 struct hci_ev_io_capa_request {
 	bdaddr_t bdaddr;
-- 
cgit v1.2.2


From d13e14148154e5ce58467e76321eef1dd912c416 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 9 Jun 2012 10:31:09 +0200
Subject: mac80211: add some missing kernel-doc

Add a few kernel-doc descriptions that were missed
during development.

Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1937c7d98304..95e39b6a02ec 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1940,6 +1940,11 @@ enum ieee80211_rate_control_changed {
  *	to also unregister the device. If it returns 1, then mac80211
  *	will also go through the regular complete restart on resume.
  *
+ * @set_wakeup: Enable or disable wakeup when WoWLAN configuration is
+ *	modified. The reason is that device_set_wakeup_enable() is
+ *	supposed to be called when the configuration changes, not only
+ *	in suspend().
+ *
  * @add_interface: Called when a netdevice attached to the hardware is
  *	enabled. Because it is not called for monitor mode devices, @start
  *	and @stop must be implemented.
@@ -2966,6 +2971,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
  * ieee80211_generic_frame_duration - Calculate the duration field for a frame
  * @hw: pointer obtained from ieee80211_alloc_hw().
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @band: the band to calculate the frame duration on
  * @frame_len: the length of the frame.
  * @rate: the rate at which the frame is going to be transmitted.
  *
-- 
cgit v1.2.2


From c5d21c4b2a7765ab0600c8426374b50eb9f4a36f Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Sun, 10 Jun 2012 20:05:24 +0000
Subject: net: Reorder initialization in ip_route_output to fix gcc warning

If I build with W=1, for every file that includes <net/route.h>, I get the warning

    include/net/route.h: In function 'ip_route_output':
    include/net/route.h:135:3: warning: initialized field overwritten [-Woverride-init]
    include/net/route.h:135:3: warning: (near initialization for 'fl4') [-Woverride-init]

(This is with "gcc (Debian 4.6.3-1) 4.6.3")

A fix seems pretty trivial: move the initialization of .flowi4_tos
earlier.  As far as I can tell, this has no effect on code generation.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index ed2b78e2375d..98705468ac03 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -130,9 +130,9 @@ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr,
 {
 	struct flowi4 fl4 = {
 		.flowi4_oif = oif,
+		.flowi4_tos = tos,
 		.daddr = daddr,
 		.saddr = saddr,
-		.flowi4_tos = tos,
 	};
 	return ip_route_output_key(net, &fl4);
 }
-- 
cgit v1.2.2


From 5ee31c6898ea5537fcea160999d60dc63bc0c305 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Jun 2012 06:03:51 +0000
Subject: bonding: Fix corrupted queue_mapping

In the transmit path of the bonding driver, skb->cb is used to
stash the skb->queue_mapping so that the bonding device can set its
own queue mapping.  This value becomes corrupted since the skb->cb is
also used in __dev_xmit_skb.

When transmitting through bonding driver, bond_select_queue is
called from dev_queue_xmit.  In bond_select_queue the original
skb->queue_mapping is copied into skb->cb (via bond_queue_mapping)
and skb->queue_mapping is overwritten with the bond driver queue.

Subsequently in dev_queue_xmit, __dev_xmit_skb is called which writes
the packet length into skb->cb, thereby overwriting the stashed
queue mappping.  In bond_dev_queue_xmit (called from hard_start_xmit),
the queue mapping for the skb is set to the stashed value which is now
the skb length and hence is an invalid queue for the slave device.

If we want to save skb->queue_mapping into skb->cb[], best place is to
add a field in struct qdisc_skb_cb, to make sure it wont conflict with
other layers (eg : Qdiscc, Infiniband...)

This patchs also makes sure (struct qdisc_skb_cb)->data is aligned on 8
bytes :

netem qdisc for example assumes it can store an u64 in it, without
misalignment penalty.

Note : we only have 20 bytes left in (struct qdisc_skb_cb)->data[].
The largest user is CHOKe and it fills it.

Based on a previous patch from Tom Herbert.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Tom Herbert <therbert@google.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Roland Dreier <roland@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 55ce96b53b09..9d7d54a00e63 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -220,13 +220,16 @@ struct tcf_proto {
 
 struct qdisc_skb_cb {
 	unsigned int		pkt_len;
-	unsigned char		data[24];
+	u16			bond_queue_mapping;
+	u16			_pad;
+	unsigned char		data[20];
 };
 
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
-	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(unsigned int) + sz);
+
+	BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz);
 	BUILD_BUG_ON(sizeof(qcb->data) < sz);
 }
 
-- 
cgit v1.2.2


From 2a0c451ade8e1783c5d453948289e4a978d417c9 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 14 Jun 2012 23:00:17 +0000
Subject: ipv6: Prevent access to uninitialized fib_table_hash via
 /proc/net/ipv6_route

/proc/net/ipv6_route reflects the contents of fib_table_hash. The proc
handler is installed in ip6_route_net_init() whereas fib_table_hash is
allocated in fib6_net_init() _after_ the proc handler has been installed.

This opens up a short time frame to access fib_table_hash with its pants
down.

fib6_init() as a whole can't be moved to an earlier position as it also
registers the rtnetlink message handlers which should be registered at
the end. Therefore split it into fib6_init() which is run early and
fib6_init_late() to register the rtnetlink message handlers.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0ae759a6c76e..209af13b0336 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -271,6 +271,8 @@ extern void			fib6_run_gc(unsigned long expires,
 extern void			fib6_gc_cleanup(void);
 
 extern int			fib6_init(void);
+extern int			fib6_init_late(void);
+extern void			fib6_cleanup_late(void);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 extern int			fib6_rules_init(void);
-- 
cgit v1.2.2


From e8803b6c387129059e04d9e14d49efda250a7361 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 16 Jun 2012 01:12:19 -0700
Subject: Revert "ipv6: Prevent access to uninitialized fib_table_hash via
 /proc/net/ipv6_route"

This reverts commit 2a0c451ade8e1783c5d453948289e4a978d417c9.

It causes crashes, because now ip6_null_entry is used before
it is initialized.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 209af13b0336..0ae759a6c76e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -271,8 +271,6 @@ extern void			fib6_run_gc(unsigned long expires,
 extern void			fib6_gc_cleanup(void);
 
 extern int			fib6_init(void);
-extern int			fib6_init_late(void);
-extern void			fib6_cleanup_late(void);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 extern int			fib6_rules_init(void);
-- 
cgit v1.2.2


From 31fdc5553b42abd7e29bb7b89f6ba07514eb4763 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Wed, 13 Jun 2012 22:29:03 +0000
Subject: net: remove my future former mail address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
Cc: Sakari Ailus <sakari.ailus@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/gprs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/phonet/gprs.h b/include/net/phonet/gprs.h
index 928daf595beb..bcd525e39a0b 100644
--- a/include/net/phonet/gprs.h
+++ b/include/net/phonet/gprs.h
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2008 Nokia Corporation.
  *
- * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
+ * Author: Rémi Denis-Courmont
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
-- 
cgit v1.2.2


From 4244854d22bf8f782698c5224b9191c8d2d42610 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Sat, 30 Jun 2012 03:04:26 +0000
Subject: sctp: be more restrictive in transport selection on bundled sacks

It was noticed recently that when we send data on a transport, its possible that
we might bundle a sack that arrived on a different transport.  While this isn't
a major problem, it does go against the SHOULD requirement in section 6.4 of RFC
2960:

 An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK,
   etc.) to the same destination transport address from which it
   received the DATA or control chunk to which it is replying.  This
   rule should also be followed if the endpoint is bundling DATA chunks
   together with the reply chunk.

This patch seeks to correct that.  It restricts the bundling of sack operations
to only those transports which have moved the ctsn of the association forward
since the last sack.  By doing this we guarantee that we only bundle outbound
saks on a transport that has received a chunk since the last sack.  This brings
us into stricter compliance with the RFC.

Vlad had initially suggested that we strictly allow only sack bundling on the
transport that last moved the ctsn forward.  While this makes sense, I was
concerned that doing so prevented us from bundling in the case where we had
received chunks that moved the ctsn on multiple transports.  In those cases, the
RFC allows us to select any of the transports having received chunks to bundle
the sack on.  so I've modified the approach to allow for that, by adding a state
variable to each transport that tracks weather it has moved the ctsn since the
last sack.  This I think keeps our behavior (and performance), close enough to
our current profile that I think we can do this without a sysctl knob to
enable/disable it.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Vlad Yaseivch <vyasevich@gmail.com>
CC: David S. Miller <davem@davemloft.net>
CC: linux-sctp@vger.kernel.org
Reported-by: Michele Baldessari <michele@redhat.com>
Reported-by: sorin serban <sserban@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 4 ++++
 include/net/sctp/tsnmap.h  | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e4652fe58958..fecdf31816f2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -912,6 +912,9 @@ struct sctp_transport {
 		/* Is this structure kfree()able? */
 		malloced:1;
 
+	/* Has this transport moved the ctsn since we last sacked */
+	__u32 sack_generation;
+
 	struct flowi fl;
 
 	/* This is the peer's IP address and port. */
@@ -1584,6 +1587,7 @@ struct sctp_association {
 		 */
 		__u8    sack_needed;     /* Do we need to sack the peer? */
 		__u32	sack_cnt;
+		__u32	sack_generation;
 
 		/* These are capabilities which our peer advertised.  */
 		__u8	ecn_capable:1,	    /* Can peer do ECN? */
diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index e7728bc14ccf..2c5d2b4d5d1e 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map);
 int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn);
 
 /* Mark this TSN as seen.  */
-int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn);
+int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn,
+		     struct sctp_transport *trans);
 
 /* Mark this TSN and all lower as seen. */
 void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn);
-- 
cgit v1.2.2


From 6bd0405bb4196b44f1acb7a58f11382cdaf6f7f0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Jul 2012 15:42:10 +0200
Subject: netfilter: nf_ct_ecache: fix crash with multiple containers, one
 shutting down

Hans reports that he's still hitting:

BUG: unable to handle kernel NULL pointer dereference at 000000000000027c
IP: [<ffffffff813615db>] netlink_has_listeners+0xb/0x60
PGD 0
Oops: 0000 [#3] PREEMPT SMP
CPU 0

It happens when adding a number of containers with do:

nfct_query(h, NFCT_Q_CREATE, ct);

and most likely one namespace shuts down.

this problem was supposed to be fixed by:
70e9942 netfilter: nf_conntrack: make event callback registration per-netns

Still, it was missing one rcu_access_pointer to check if the callback
is set or not.

Reported-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index a88fb6939387..e1ce1048fe5f 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -78,7 +78,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *e;
 
-	if (net->ct.nf_conntrack_event_cb == NULL)
+	if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
 		return;
 
 	e = nf_ct_ecache_find(ct);
-- 
cgit v1.2.2


From 9e33ce453f8ac8452649802bee1f410319408f4b Mon Sep 17 00:00:00 2001
From: Lin Ming <mlin@ss.pku.edu.cn>
Date: Sat, 7 Jul 2012 18:26:10 +0800
Subject: ipvs: fix oops on NAT reply in br_nf context

IPVS should not reset skb->nf_bridge in FORWARD hook
by calling nf_reset for NAT replies. It triggers oops in
br_nf_forward_finish.

[  579.781508] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[  579.781669] IP: [<ffffffff817b1ca5>] br_nf_forward_finish+0x58/0x112
[  579.781792] PGD 218f9067 PUD 0
[  579.781865] Oops: 0000 [#1] SMP
[  579.781945] CPU 0
[  579.781983] Modules linked in:
[  579.782047]
[  579.782080]
[  579.782114] Pid: 4644, comm: qemu Tainted: G        W    3.5.0-rc5-00006-g95e69f9 #282 Hewlett-Packard  /30E8
[  579.782300] RIP: 0010:[<ffffffff817b1ca5>]  [<ffffffff817b1ca5>] br_nf_forward_finish+0x58/0x112
[  579.782455] RSP: 0018:ffff88007b003a98  EFLAGS: 00010287
[  579.782541] RAX: 0000000000000008 RBX: ffff8800762ead00 RCX: 000000000001670a
[  579.782653] RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff8800762ead00
[  579.782845] RBP: ffff88007b003ac8 R08: 0000000000016630 R09: ffff88007b003a90
[  579.782957] R10: ffff88007b0038e8 R11: ffff88002da37540 R12: ffff88002da01a02
[  579.783066] R13: ffff88002da01a80 R14: ffff88002d83c000 R15: ffff88002d82a000
[  579.783177] FS:  0000000000000000(0000) GS:ffff88007b000000(0063) knlGS:00000000f62d1b70
[  579.783306] CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
[  579.783395] CR2: 0000000000000004 CR3: 00000000218fe000 CR4: 00000000000027f0
[  579.783505] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  579.783684] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  579.783795] Process qemu (pid: 4644, threadinfo ffff880021b20000, task ffff880021aba760)
[  579.783919] Stack:
[  579.783959]  ffff88007693cedc ffff8800762ead00 ffff88002da01a02 ffff8800762ead00
[  579.784110]  ffff88002da01a02 ffff88002da01a80 ffff88007b003b18 ffffffff817b26c7
[  579.784260]  ffff880080000000 ffffffff81ef59f0 ffff8800762ead00 ffffffff81ef58b0
[  579.784477] Call Trace:
[  579.784523]  <IRQ>
[  579.784562]
[  579.784603]  [<ffffffff817b26c7>] br_nf_forward_ip+0x275/0x2c8
[  579.784707]  [<ffffffff81704b58>] nf_iterate+0x47/0x7d
[  579.784797]  [<ffffffff817ac32e>] ? br_dev_queue_push_xmit+0xae/0xae
[  579.784906]  [<ffffffff81704bfb>] nf_hook_slow+0x6d/0x102
[  579.784995]  [<ffffffff817ac32e>] ? br_dev_queue_push_xmit+0xae/0xae
[  579.785175]  [<ffffffff8187fa95>] ? _raw_write_unlock_bh+0x19/0x1b
[  579.785179]  [<ffffffff817ac417>] __br_forward+0x97/0xa2
[  579.785179]  [<ffffffff817ad366>] br_handle_frame_finish+0x1a6/0x257
[  579.785179]  [<ffffffff817b2386>] br_nf_pre_routing_finish+0x26d/0x2cb
[  579.785179]  [<ffffffff817b2cf0>] br_nf_pre_routing+0x55d/0x5c1
[  579.785179]  [<ffffffff81704b58>] nf_iterate+0x47/0x7d
[  579.785179]  [<ffffffff817ad1c0>] ? br_handle_local_finish+0x44/0x44
[  579.785179]  [<ffffffff81704bfb>] nf_hook_slow+0x6d/0x102
[  579.785179]  [<ffffffff817ad1c0>] ? br_handle_local_finish+0x44/0x44
[  579.785179]  [<ffffffff81551525>] ? sky2_poll+0xb35/0xb54
[  579.785179]  [<ffffffff817ad62a>] br_handle_frame+0x213/0x229
[  579.785179]  [<ffffffff817ad417>] ? br_handle_frame_finish+0x257/0x257
[  579.785179]  [<ffffffff816e3b47>] __netif_receive_skb+0x2b4/0x3f1
[  579.785179]  [<ffffffff816e69fc>] process_backlog+0x99/0x1e2
[  579.785179]  [<ffffffff816e6800>] net_rx_action+0xdf/0x242
[  579.785179]  [<ffffffff8107e8a8>] __do_softirq+0xc1/0x1e0
[  579.785179]  [<ffffffff8135a5ba>] ? trace_hardirqs_off_thunk+0x3a/0x6c
[  579.785179]  [<ffffffff8188812c>] call_softirq+0x1c/0x30

The steps to reproduce as follow,

1. On Host1, setup brige br0(192.168.1.106)
2. Boot a kvm guest(192.168.1.105) on Host1 and start httpd
3. Start IPVS service on Host1
   ipvsadm -A -t 192.168.1.106:80 -s rr
   ipvsadm -a -t 192.168.1.106:80 -r 192.168.1.105:80 -m
4. Run apache benchmark on Host2(192.168.1.101)
   ab -n 1000 http://192.168.1.106/

ip_vs_reply4
  ip_vs_out
    handle_response
      ip_vs_notrack
        nf_reset()
        {
          skb->nf_bridge = NULL;
        }

Actually, IPVS wants in this case just to replace nfct
with untracked version. So replace the nf_reset(skb) call
in ip_vs_notrack() with a nf_conntrack_put(skb->nfct) call.

Signed-off-by: Lin Ming <mlin@ss.pku.edu.cn>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d6146b4811c2..95374d1696a1 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1425,7 +1425,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 	if (!ct || !nf_ct_is_untracked(ct)) {
-		nf_reset(skb);
+		nf_conntrack_put(skb->nfct);
 		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
-- 
cgit v1.2.2